@clazic/kordoc 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TS3F57LY.js → chunk-3FTA6V7S.js} +394 -67
- package/dist/chunk-3FTA6V7S.js.map +1 -0
- package/dist/{chunk-TND4YFBV.js → chunk-USE7IDLV.js} +2 -2
- package/dist/cli.js +46 -11
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +551 -231
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +71 -9
- package/dist/index.d.ts +71 -9
- package/dist/index.js +537 -217
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-F66K7PXH.js → utils-XYBJBWM2.js} +2 -2
- package/dist/{watch-2S5ULHAM.js → watch-CJRS6OYE.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-TS3F57LY.js.map +0 -1
- /package/dist/{chunk-TND4YFBV.js.map → chunk-USE7IDLV.js.map} +0 -0
- /package/dist/{utils-F66K7PXH.js.map → utils-XYBJBWM2.js.map} +0 -0
- /package/dist/{watch-2S5ULHAM.js.map → watch-CJRS6OYE.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -33,118 +33,6 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
33
33
|
));
|
|
34
34
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
35
35
|
|
|
36
|
-
// src/utils.ts
|
|
37
|
-
var utils_exports = {};
|
|
38
|
-
__export(utils_exports, {
|
|
39
|
-
KordocError: () => KordocError,
|
|
40
|
-
VERSION: () => VERSION,
|
|
41
|
-
classifyError: () => classifyError,
|
|
42
|
-
isPathTraversal: () => isPathTraversal,
|
|
43
|
-
normalizeKordocError: () => normalizeKordocError,
|
|
44
|
-
precheckZipSize: () => precheckZipSize,
|
|
45
|
-
sanitizeError: () => sanitizeError,
|
|
46
|
-
sanitizeHref: () => sanitizeHref,
|
|
47
|
-
toArrayBuffer: () => toArrayBuffer
|
|
48
|
-
});
|
|
49
|
-
function toArrayBuffer(buf) {
|
|
50
|
-
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
51
|
-
return buf.buffer;
|
|
52
|
-
}
|
|
53
|
-
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
54
|
-
}
|
|
55
|
-
function sanitizeError(err) {
|
|
56
|
-
if (err instanceof KordocError) return err.message;
|
|
57
|
-
return "\uBB38\uC11C \uCC98\uB9AC \uC911 \uC624\uB958\uAC00 \uBC1C\uC0DD\uD588\uC2B5\uB2C8\uB2E4";
|
|
58
|
-
}
|
|
59
|
-
function isPathTraversal(name) {
|
|
60
|
-
if (name.includes("\0")) return true;
|
|
61
|
-
const normalized = name.replace(/\\/g, "/");
|
|
62
|
-
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
63
|
-
}
|
|
64
|
-
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
65
|
-
try {
|
|
66
|
-
const data = new DataView(buffer);
|
|
67
|
-
const len = buffer.byteLength;
|
|
68
|
-
let eocdOffset = -1;
|
|
69
|
-
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
70
|
-
if (data.getUint32(i, true) === 101010256) {
|
|
71
|
-
eocdOffset = i;
|
|
72
|
-
break;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
76
|
-
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
77
|
-
if (entryCount > maxEntries) {
|
|
78
|
-
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
79
|
-
}
|
|
80
|
-
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
81
|
-
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
82
|
-
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
83
|
-
let totalUncompressed = 0;
|
|
84
|
-
let pos = cdOffset;
|
|
85
|
-
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
86
|
-
if (data.getUint32(pos, true) !== 33639248) break;
|
|
87
|
-
totalUncompressed += data.getUint32(pos + 24, true);
|
|
88
|
-
const nameLen = data.getUint16(pos + 28, true);
|
|
89
|
-
const extraLen = data.getUint16(pos + 30, true);
|
|
90
|
-
const commentLen = data.getUint16(pos + 32, true);
|
|
91
|
-
pos += 46 + nameLen + extraLen + commentLen;
|
|
92
|
-
}
|
|
93
|
-
if (totalUncompressed > maxUncompressedSize) {
|
|
94
|
-
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
95
|
-
}
|
|
96
|
-
return { totalUncompressed, entryCount };
|
|
97
|
-
} catch (err) {
|
|
98
|
-
if (err instanceof KordocError) throw err;
|
|
99
|
-
return { totalUncompressed: 0, entryCount: 0 };
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
function sanitizeHref(href) {
|
|
103
|
-
const trimmed = href.trim();
|
|
104
|
-
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
105
|
-
return trimmed;
|
|
106
|
-
}
|
|
107
|
-
function classifyError(err) {
|
|
108
|
-
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
109
|
-
const msg = err.message;
|
|
110
|
-
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
111
|
-
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
112
|
-
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
113
|
-
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
114
|
-
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
115
|
-
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
116
|
-
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
117
|
-
return "PARSE_ERROR";
|
|
118
|
-
}
|
|
119
|
-
function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
|
|
120
|
-
if (err instanceof KordocError) {
|
|
121
|
-
if (!err.stage) err.stage = stage;
|
|
122
|
-
if (!err.code) err.code = fallbackCode;
|
|
123
|
-
return err;
|
|
124
|
-
}
|
|
125
|
-
const message = err instanceof Error ? err.message : fallbackMessage;
|
|
126
|
-
const code = err instanceof Error ? classifyError(err) : fallbackCode;
|
|
127
|
-
return new KordocError(message || fallbackMessage, { code, stage });
|
|
128
|
-
}
|
|
129
|
-
var VERSION, KordocError, SAFE_HREF_RE;
|
|
130
|
-
var init_utils = __esm({
|
|
131
|
-
"src/utils.ts"() {
|
|
132
|
-
"use strict";
|
|
133
|
-
VERSION = true ? "2.5.2" : "0.0.0-dev";
|
|
134
|
-
KordocError = class extends Error {
|
|
135
|
-
code;
|
|
136
|
-
stage;
|
|
137
|
-
constructor(message, opts = {}) {
|
|
138
|
-
super(message);
|
|
139
|
-
this.name = "KordocError";
|
|
140
|
-
this.code = opts.code;
|
|
141
|
-
this.stage = opts.stage;
|
|
142
|
-
}
|
|
143
|
-
};
|
|
144
|
-
SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
145
|
-
}
|
|
146
|
-
});
|
|
147
|
-
|
|
148
36
|
// src/page-range.ts
|
|
149
37
|
var page_range_exports = {};
|
|
150
38
|
__export(page_range_exports, {
|
|
@@ -3196,7 +3084,7 @@ __export(index_exports, {
|
|
|
3196
3084
|
runUnifiedOcrPipeline: () => runUnifiedOcrPipeline
|
|
3197
3085
|
});
|
|
3198
3086
|
module.exports = __toCommonJS(index_exports);
|
|
3199
|
-
var
|
|
3087
|
+
var import_promises5 = require("fs/promises");
|
|
3200
3088
|
|
|
3201
3089
|
// src/detect.ts
|
|
3202
3090
|
var import_jszip = __toESM(require("jszip"), 1);
|
|
@@ -3248,8 +3136,97 @@ async function detectZipFormat(buffer) {
|
|
|
3248
3136
|
var import_jszip2 = __toESM(require("jszip"), 1);
|
|
3249
3137
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3250
3138
|
|
|
3139
|
+
// src/utils.ts
|
|
3140
|
+
var VERSION = true ? "2.6.1" : "0.0.0-dev";
|
|
3141
|
+
function toArrayBuffer(buf) {
|
|
3142
|
+
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3143
|
+
return buf.buffer;
|
|
3144
|
+
}
|
|
3145
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
3146
|
+
}
|
|
3147
|
+
var KordocError = class extends Error {
|
|
3148
|
+
code;
|
|
3149
|
+
stage;
|
|
3150
|
+
constructor(message, opts = {}) {
|
|
3151
|
+
super(message);
|
|
3152
|
+
this.name = "KordocError";
|
|
3153
|
+
this.code = opts.code;
|
|
3154
|
+
this.stage = opts.stage;
|
|
3155
|
+
}
|
|
3156
|
+
};
|
|
3157
|
+
function isPathTraversal(name) {
|
|
3158
|
+
if (name.includes("\0")) return true;
|
|
3159
|
+
const normalized = name.replace(/\\/g, "/");
|
|
3160
|
+
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
3161
|
+
}
|
|
3162
|
+
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
3163
|
+
try {
|
|
3164
|
+
const data = new DataView(buffer);
|
|
3165
|
+
const len = buffer.byteLength;
|
|
3166
|
+
let eocdOffset = -1;
|
|
3167
|
+
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
3168
|
+
if (data.getUint32(i, true) === 101010256) {
|
|
3169
|
+
eocdOffset = i;
|
|
3170
|
+
break;
|
|
3171
|
+
}
|
|
3172
|
+
}
|
|
3173
|
+
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
3174
|
+
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
3175
|
+
if (entryCount > maxEntries) {
|
|
3176
|
+
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
3177
|
+
}
|
|
3178
|
+
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
3179
|
+
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
3180
|
+
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
3181
|
+
let totalUncompressed = 0;
|
|
3182
|
+
let pos = cdOffset;
|
|
3183
|
+
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
3184
|
+
if (data.getUint32(pos, true) !== 33639248) break;
|
|
3185
|
+
totalUncompressed += data.getUint32(pos + 24, true);
|
|
3186
|
+
const nameLen = data.getUint16(pos + 28, true);
|
|
3187
|
+
const extraLen = data.getUint16(pos + 30, true);
|
|
3188
|
+
const commentLen = data.getUint16(pos + 32, true);
|
|
3189
|
+
pos += 46 + nameLen + extraLen + commentLen;
|
|
3190
|
+
}
|
|
3191
|
+
if (totalUncompressed > maxUncompressedSize) {
|
|
3192
|
+
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
3193
|
+
}
|
|
3194
|
+
return { totalUncompressed, entryCount };
|
|
3195
|
+
} catch (err) {
|
|
3196
|
+
if (err instanceof KordocError) throw err;
|
|
3197
|
+
return { totalUncompressed: 0, entryCount: 0 };
|
|
3198
|
+
}
|
|
3199
|
+
}
|
|
3200
|
+
var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
3201
|
+
function sanitizeHref(href) {
|
|
3202
|
+
const trimmed = href.trim();
|
|
3203
|
+
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
3204
|
+
return trimmed;
|
|
3205
|
+
}
|
|
3206
|
+
function classifyError(err) {
|
|
3207
|
+
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
3208
|
+
const msg = err.message;
|
|
3209
|
+
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
3210
|
+
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
3211
|
+
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
3212
|
+
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
3213
|
+
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
3214
|
+
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
3215
|
+
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
3216
|
+
return "PARSE_ERROR";
|
|
3217
|
+
}
|
|
3218
|
+
function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
|
|
3219
|
+
if (err instanceof KordocError) {
|
|
3220
|
+
if (!err.stage) err.stage = stage;
|
|
3221
|
+
if (!err.code) err.code = fallbackCode;
|
|
3222
|
+
return err;
|
|
3223
|
+
}
|
|
3224
|
+
const message = err instanceof Error ? err.message : fallbackMessage;
|
|
3225
|
+
const code = err instanceof Error ? classifyError(err) : fallbackCode;
|
|
3226
|
+
return new KordocError(message || fallbackMessage, { code, stage });
|
|
3227
|
+
}
|
|
3228
|
+
|
|
3251
3229
|
// src/table/builder.ts
|
|
3252
|
-
init_utils();
|
|
3253
3230
|
var MAX_COLS = 200;
|
|
3254
3231
|
var MAX_ROWS = 1e4;
|
|
3255
3232
|
function buildTable(rows) {
|
|
@@ -3509,8 +3486,6 @@ var HEADING_RATIO_H2 = 1.3;
|
|
|
3509
3486
|
var HEADING_RATIO_H3 = 1.15;
|
|
3510
3487
|
|
|
3511
3488
|
// src/hwpx/parser.ts
|
|
3512
|
-
init_utils();
|
|
3513
|
-
init_utils();
|
|
3514
3489
|
init_page_range();
|
|
3515
3490
|
init_logger();
|
|
3516
3491
|
var MAX_DECOMPRESS_SIZE = 500 * 1024 * 1024;
|
|
@@ -4353,7 +4328,6 @@ function extractTextFromNode(node) {
|
|
|
4353
4328
|
|
|
4354
4329
|
// src/hwp5/record.ts
|
|
4355
4330
|
var import_zlib = require("zlib");
|
|
4356
|
-
init_utils();
|
|
4357
4331
|
var TAG_PARA_HEADER = 66;
|
|
4358
4332
|
var TAG_PARA_TEXT = 67;
|
|
4359
4333
|
var TAG_CHAR_SHAPE = 68;
|
|
@@ -5403,7 +5377,6 @@ function parseLenientCfb(data) {
|
|
|
5403
5377
|
}
|
|
5404
5378
|
|
|
5405
5379
|
// src/hwp5/parser.ts
|
|
5406
|
-
init_utils();
|
|
5407
5380
|
init_page_range();
|
|
5408
5381
|
init_logger();
|
|
5409
5382
|
var CFB = __toESM(require_cfb(), 1);
|
|
@@ -6059,7 +6032,6 @@ function arrangeCells(rows, cols, cells) {
|
|
|
6059
6032
|
}
|
|
6060
6033
|
|
|
6061
6034
|
// src/pdf/parser.ts
|
|
6062
|
-
init_utils();
|
|
6063
6035
|
init_page_range();
|
|
6064
6036
|
var import_module = require("module");
|
|
6065
6037
|
var import_path4 = require("path");
|
|
@@ -7953,7 +7925,6 @@ function mergeKoreanLines(text) {
|
|
|
7953
7925
|
// src/xlsx/parser.ts
|
|
7954
7926
|
var import_jszip3 = __toESM(require("jszip"), 1);
|
|
7955
7927
|
var import_xmldom2 = require("@xmldom/xmldom");
|
|
7956
|
-
init_utils();
|
|
7957
7928
|
init_logger();
|
|
7958
7929
|
var MAX_SHEETS = 100;
|
|
7959
7930
|
var MAX_DECOMPRESS_SIZE3 = 500 * 1024 * 1024;
|
|
@@ -8282,7 +8253,6 @@ async function parseXlsxDocument(buffer, options, existingZip) {
|
|
|
8282
8253
|
// src/docx/parser.ts
|
|
8283
8254
|
var import_jszip4 = __toESM(require("jszip"), 1);
|
|
8284
8255
|
var import_xmldom3 = require("@xmldom/xmldom");
|
|
8285
|
-
init_utils();
|
|
8286
8256
|
init_logger();
|
|
8287
8257
|
var MAX_DECOMPRESS_SIZE4 = 500 * 1024 * 1024;
|
|
8288
8258
|
function getChildElements(parent, localName) {
|
|
@@ -8762,7 +8732,6 @@ async function parseDocxDocument(buffer, options, existingZip) {
|
|
|
8762
8732
|
}
|
|
8763
8733
|
|
|
8764
8734
|
// src/index.ts
|
|
8765
|
-
init_utils();
|
|
8766
8735
|
init_cli_provider();
|
|
8767
8736
|
init_markdown_to_blocks();
|
|
8768
8737
|
init_logger();
|
|
@@ -11265,8 +11234,7 @@ async function markdownToXlsx(markdown, options) {
|
|
|
11265
11234
|
}
|
|
11266
11235
|
|
|
11267
11236
|
// src/convert/index.ts
|
|
11268
|
-
var
|
|
11269
|
-
init_utils();
|
|
11237
|
+
var import_promises3 = require("fs/promises");
|
|
11270
11238
|
|
|
11271
11239
|
// src/convert/libreoffice.ts
|
|
11272
11240
|
var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
|
|
@@ -11280,19 +11248,250 @@ var ConvertError = class extends Error {
|
|
|
11280
11248
|
}
|
|
11281
11249
|
};
|
|
11282
11250
|
|
|
11283
|
-
// src/convert/
|
|
11284
|
-
var
|
|
11285
|
-
|
|
11286
|
-
|
|
11251
|
+
// src/convert/installer.ts
|
|
11252
|
+
var import_os3 = require("os");
|
|
11253
|
+
var import_path5 = require("path");
|
|
11254
|
+
var import_promises2 = require("fs/promises");
|
|
11255
|
+
var import_fs4 = require("fs");
|
|
11256
|
+
var import_child_process4 = require("child_process");
|
|
11257
|
+
var installInFlight = null;
|
|
11258
|
+
var CACHE_DIR = (0, import_path5.join)((0, import_os3.homedir)(), ".cache", "kordoc", "libreoffice");
|
|
11259
|
+
var VERSION_FILE = (0, import_path5.join)(CACHE_DIR, "version");
|
|
11260
|
+
var PACKAGES = {
|
|
11261
|
+
darwin: {
|
|
11262
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/mac/x86_64/LibreOffice_24.8.4_MacOS_x86-64.dmg",
|
|
11263
|
+
binPath: "LibreOffice.app/Contents/MacOS/soffice",
|
|
11264
|
+
sizeMb: 300
|
|
11265
|
+
},
|
|
11266
|
+
linux: {
|
|
11267
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/deb/x86_64/LibreOffice_24.8.4_Linux_x86-64_deb.tar.gz",
|
|
11268
|
+
binPath: "opt/libreoffice24.8/program/soffice",
|
|
11269
|
+
sizeMb: 200
|
|
11270
|
+
},
|
|
11271
|
+
win32: {
|
|
11272
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
|
|
11273
|
+
binPath: "LibreOffice/program/soffice.exe",
|
|
11274
|
+
sizeMb: 350
|
|
11275
|
+
}
|
|
11276
|
+
};
|
|
11277
|
+
async function findInPath() {
|
|
11278
|
+
return new Promise((resolve4) => {
|
|
11279
|
+
const child = (0, import_child_process4.spawn)("soffice", ["--version"], { stdio: "ignore" });
|
|
11280
|
+
child.on("close", (code) => resolve4(code === 0 ? "soffice" : null));
|
|
11281
|
+
child.on("error", () => resolve4(null));
|
|
11282
|
+
});
|
|
11283
|
+
}
|
|
11284
|
+
async function findInCache() {
|
|
11285
|
+
const cachedBin = (0, import_path5.join)(CACHE_DIR, "bin", "soffice");
|
|
11287
11286
|
try {
|
|
11288
|
-
await
|
|
11287
|
+
await (0, import_promises2.access)(cachedBin);
|
|
11288
|
+
return cachedBin;
|
|
11289
11289
|
} catch {
|
|
11290
|
+
return null;
|
|
11291
|
+
}
|
|
11292
|
+
}
|
|
11293
|
+
async function findInDefaultPaths() {
|
|
11294
|
+
const platform = process.platform;
|
|
11295
|
+
const paths = [];
|
|
11296
|
+
if (platform === "darwin") {
|
|
11297
|
+
paths.push(
|
|
11298
|
+
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
|
|
11299
|
+
"/opt/homebrew/bin/soffice",
|
|
11300
|
+
"/usr/local/bin/soffice"
|
|
11301
|
+
);
|
|
11302
|
+
} else if (platform === "linux") {
|
|
11303
|
+
paths.push(
|
|
11304
|
+
"/usr/bin/soffice",
|
|
11305
|
+
"/usr/lib/libreoffice/program/soffice"
|
|
11306
|
+
);
|
|
11307
|
+
} else if (platform === "win32") {
|
|
11308
|
+
const pf = process.env["ProgramFiles"] ?? "C:\\Program Files";
|
|
11309
|
+
const pf86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
|
|
11310
|
+
paths.push(
|
|
11311
|
+
(0, import_path5.join)(pf, "LibreOffice", "program", "soffice.exe"),
|
|
11312
|
+
(0, import_path5.join)(pf86, "LibreOffice", "program", "soffice.exe")
|
|
11313
|
+
);
|
|
11314
|
+
}
|
|
11315
|
+
for (const p of paths) {
|
|
11316
|
+
try {
|
|
11317
|
+
await (0, import_promises2.access)(p);
|
|
11318
|
+
return p;
|
|
11319
|
+
} catch {
|
|
11320
|
+
continue;
|
|
11321
|
+
}
|
|
11322
|
+
}
|
|
11323
|
+
return null;
|
|
11324
|
+
}
|
|
11325
|
+
async function downloadWithProgress(url, dest, totalBytes, onProgress) {
|
|
11326
|
+
const response = await fetch(url);
|
|
11327
|
+
if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
|
|
11328
|
+
const file = (0, import_fs4.createWriteStream)(dest);
|
|
11329
|
+
const reader = response.body.getReader();
|
|
11330
|
+
let downloaded = 0;
|
|
11331
|
+
try {
|
|
11332
|
+
while (true) {
|
|
11333
|
+
const { done, value } = await reader.read();
|
|
11334
|
+
if (done) break;
|
|
11335
|
+
file.write(value);
|
|
11336
|
+
downloaded += value.length;
|
|
11337
|
+
onProgress?.(downloaded, totalBytes);
|
|
11338
|
+
}
|
|
11339
|
+
} finally {
|
|
11340
|
+
file.end();
|
|
11341
|
+
reader.releaseLock();
|
|
11342
|
+
}
|
|
11343
|
+
}
|
|
11344
|
+
async function installForPlatform(pkg, onProgress) {
|
|
11345
|
+
const platform = process.platform;
|
|
11346
|
+
await (0, import_promises2.mkdir)(CACHE_DIR, { recursive: true });
|
|
11347
|
+
const downloadPath = (0, import_path5.join)(CACHE_DIR, `download-${Date.now()}`);
|
|
11348
|
+
await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
|
|
11349
|
+
try {
|
|
11350
|
+
if (platform === "darwin") {
|
|
11351
|
+
return await installMacOS(pkg, downloadPath);
|
|
11352
|
+
} else if (platform === "linux") {
|
|
11353
|
+
return await installLinux(pkg, downloadPath);
|
|
11354
|
+
} else if (platform === "win32") {
|
|
11355
|
+
return await installWindows(pkg, downloadPath);
|
|
11356
|
+
}
|
|
11357
|
+
} catch (err) {
|
|
11358
|
+
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11359
|
+
throw err;
|
|
11360
|
+
}
|
|
11361
|
+
throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
|
|
11362
|
+
}
|
|
11363
|
+
async function installMacOS(pkg, downloadPath) {
|
|
11364
|
+
const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
|
|
11365
|
+
await new Promise((resolve4, reject) => {
|
|
11366
|
+
const child = (0, import_child_process4.spawn)("hdiutil", ["attach", "-nobrowse", "-mountpoint", mountPoint, downloadPath]);
|
|
11367
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328")));
|
|
11368
|
+
});
|
|
11369
|
+
try {
|
|
11370
|
+
const appSource = (0, import_path5.join)(mountPoint, "LibreOffice.app");
|
|
11371
|
+
const appDest = (0, import_path5.join)(CACHE_DIR, "LibreOffice.app");
|
|
11372
|
+
await new Promise((resolve4, reject) => {
|
|
11373
|
+
const child = (0, import_child_process4.spawn)("cp", ["-R", appSource, appDest]);
|
|
11374
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
|
|
11375
|
+
});
|
|
11376
|
+
} finally {
|
|
11377
|
+
await new Promise((resolve4) => {
|
|
11378
|
+
const child = (0, import_child_process4.spawn)("hdiutil", ["detach", mountPoint]);
|
|
11379
|
+
child.on("close", () => resolve4());
|
|
11380
|
+
});
|
|
11381
|
+
}
|
|
11382
|
+
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11383
|
+
return await createSymlink((0, import_path5.join)(CACHE_DIR, pkg.binPath));
|
|
11384
|
+
}
|
|
11385
|
+
async function installLinux(pkg, downloadPath) {
|
|
11386
|
+
const extractDir = (0, import_path5.join)(CACHE_DIR, `extract-${Date.now()}`);
|
|
11387
|
+
await (0, import_promises2.mkdir)(extractDir, { recursive: true });
|
|
11388
|
+
await new Promise((resolve4, reject) => {
|
|
11389
|
+
const child = (0, import_child_process4.spawn)("tar", ["xzf", downloadPath, "-C", extractDir]);
|
|
11390
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
|
|
11391
|
+
});
|
|
11392
|
+
const debsDir = (0, import_path5.join)(extractDir, "DEBS");
|
|
11393
|
+
try {
|
|
11394
|
+
await (0, import_promises2.access)(debsDir);
|
|
11395
|
+
const entries = await (await import("fs/promises")).readdir(debsDir);
|
|
11396
|
+
for (const entry of entries) {
|
|
11397
|
+
if (entry.endsWith(".deb")) {
|
|
11398
|
+
await new Promise((resolve4, reject) => {
|
|
11399
|
+
const child = (0, import_child_process4.spawn)("dpkg-deb", ["-x", (0, import_path5.join)(debsDir, entry), CACHE_DIR]);
|
|
11400
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
|
|
11401
|
+
});
|
|
11402
|
+
}
|
|
11403
|
+
}
|
|
11404
|
+
} catch {
|
|
11405
|
+
}
|
|
11406
|
+
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11407
|
+
await (0, import_promises2.rm)(extractDir, { recursive: true, force: true });
|
|
11408
|
+
return await createSymlink((0, import_path5.join)(CACHE_DIR, pkg.binPath));
|
|
11409
|
+
}
|
|
11410
|
+
async function installWindows(pkg, downloadPath) {
|
|
11411
|
+
await new Promise((resolve4, reject) => {
|
|
11412
|
+
const child = (0, import_child_process4.spawn)("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
|
|
11413
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
|
|
11414
|
+
});
|
|
11415
|
+
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11416
|
+
return (0, import_path5.join)(CACHE_DIR, pkg.binPath);
|
|
11417
|
+
}
|
|
11418
|
+
async function createSymlink(actualBin) {
|
|
11419
|
+
const binDir = (0, import_path5.join)(CACHE_DIR, "bin");
|
|
11420
|
+
await (0, import_promises2.mkdir)(binDir, { recursive: true });
|
|
11421
|
+
const linkBin = (0, import_path5.join)(binDir, "soffice");
|
|
11422
|
+
try {
|
|
11423
|
+
await (0, import_promises2.symlink)(actualBin, linkBin);
|
|
11424
|
+
} catch {
|
|
11425
|
+
}
|
|
11426
|
+
process.env.PATH = `${binDir}${import_path5.delimiter}${process.env.PATH}`;
|
|
11427
|
+
return linkBin;
|
|
11428
|
+
}
|
|
11429
|
+
async function installLibreOffice(onProgress) {
|
|
11430
|
+
const platform = process.platform;
|
|
11431
|
+
const pkg = PACKAGES[platform];
|
|
11432
|
+
if (!pkg) {
|
|
11290
11433
|
throw new ConvertError(
|
|
11434
|
+
"UNSUPPORTED_PLATFORM",
|
|
11435
|
+
`${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
|
|
11436
|
+
);
|
|
11437
|
+
}
|
|
11438
|
+
return await installForPlatform(pkg, onProgress);
|
|
11439
|
+
}
|
|
11440
|
+
async function resolveSoffice(emitter, autoInstall = true) {
|
|
11441
|
+
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11442
|
+
const inPath = await findInPath();
|
|
11443
|
+
if (inPath) {
|
|
11444
|
+
emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
|
|
11445
|
+
return inPath;
|
|
11446
|
+
}
|
|
11447
|
+
const inCache = await findInCache();
|
|
11448
|
+
if (inCache) {
|
|
11449
|
+
emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
|
|
11450
|
+
return inCache;
|
|
11451
|
+
}
|
|
11452
|
+
const inDefault = await findInDefaultPaths();
|
|
11453
|
+
if (inDefault) {
|
|
11454
|
+
emitter.validate("soffice_found", "\uAE30\uBCF8 \uACBD\uB85C\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inDefault });
|
|
11455
|
+
return inDefault;
|
|
11456
|
+
}
|
|
11457
|
+
if (!autoInstall) {
|
|
11458
|
+
emitter.error(
|
|
11459
|
+
"validate",
|
|
11291
11460
|
"SOFFICE_NOT_FOUND",
|
|
11292
|
-
"
|
|
11461
|
+
"LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
|
|
11462
|
+
"\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
|
|
11293
11463
|
);
|
|
11464
|
+
throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
|
|
11465
|
+
}
|
|
11466
|
+
if (installInFlight) {
|
|
11467
|
+
return installInFlight;
|
|
11294
11468
|
}
|
|
11469
|
+
emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
|
|
11470
|
+
installInFlight = (async () => {
|
|
11471
|
+
try {
|
|
11472
|
+
const installed = await installLibreOffice((downloaded, total) => {
|
|
11473
|
+
const percent = Math.round(downloaded / total * 100);
|
|
11474
|
+
emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
|
|
11475
|
+
percent,
|
|
11476
|
+
downloadedBytes: downloaded,
|
|
11477
|
+
totalBytes: total
|
|
11478
|
+
});
|
|
11479
|
+
});
|
|
11480
|
+
emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
|
|
11481
|
+
return installed;
|
|
11482
|
+
} catch (err) {
|
|
11483
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
11484
|
+
emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
|
|
11485
|
+
throw err;
|
|
11486
|
+
} finally {
|
|
11487
|
+
installInFlight = null;
|
|
11488
|
+
}
|
|
11489
|
+
})();
|
|
11490
|
+
return installInFlight;
|
|
11295
11491
|
}
|
|
11492
|
+
|
|
11493
|
+
// src/convert/libreoffice.ts
|
|
11494
|
+
var libreConvert = import_libreoffice_convert.default.convert;
|
|
11296
11495
|
async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
11297
11496
|
return new Promise((resolve4, reject) => {
|
|
11298
11497
|
const timer = setTimeout(() => {
|
|
@@ -11316,6 +11515,54 @@ async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
|
11316
11515
|
});
|
|
11317
11516
|
}
|
|
11318
11517
|
|
|
11518
|
+
// src/convert/events.ts
|
|
11519
|
+
var ConvertEventEmitter = class {
|
|
11520
|
+
listener = null;
|
|
11521
|
+
/** 이벤트 리스너 등록 */
|
|
11522
|
+
setListener(listener) {
|
|
11523
|
+
this.listener = listener;
|
|
11524
|
+
}
|
|
11525
|
+
/** 이벤트 발송 */
|
|
11526
|
+
emit(event) {
|
|
11527
|
+
try {
|
|
11528
|
+
this.listener?.(event);
|
|
11529
|
+
} catch {
|
|
11530
|
+
}
|
|
11531
|
+
}
|
|
11532
|
+
/** 타입 안전한 헬퍼: detect 이벤트 */
|
|
11533
|
+
detect(stage, message, meta) {
|
|
11534
|
+
this.emit({ type: "detect", stage, message, ...meta });
|
|
11535
|
+
}
|
|
11536
|
+
/** 타입 안전한 헬퍼: validate 이벤트 */
|
|
11537
|
+
validate(stage, message, meta) {
|
|
11538
|
+
this.emit({ type: "validate", stage, message, ...meta });
|
|
11539
|
+
}
|
|
11540
|
+
/** 타입 안전한 헬퍼: install 이벤트 */
|
|
11541
|
+
install(stage, message, meta) {
|
|
11542
|
+
this.emit({ type: "install", stage, message, ...meta });
|
|
11543
|
+
}
|
|
11544
|
+
/** 타입 안전한 헬퍼: convert 진행 이벤트 */
|
|
11545
|
+
progress(percent, message) {
|
|
11546
|
+
this.emit({ type: "convert", stage: "convert_progress", message, percent });
|
|
11547
|
+
}
|
|
11548
|
+
/** 타입 안전한 헬퍼: convert 시작 */
|
|
11549
|
+
convertStart(message) {
|
|
11550
|
+
this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
|
|
11551
|
+
}
|
|
11552
|
+
/** 타입 안전한 헬퍼: convert 완료 */
|
|
11553
|
+
convertDone(message) {
|
|
11554
|
+
this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
|
|
11555
|
+
}
|
|
11556
|
+
/** 타입 안전한 헬퍼: 완료 이벤트 */
|
|
11557
|
+
complete(result) {
|
|
11558
|
+
this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
|
|
11559
|
+
}
|
|
11560
|
+
/** 타입 안전한 헬퍼: 에러 이벤트 */
|
|
11561
|
+
error(stage, code, message, suggestion) {
|
|
11562
|
+
this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
|
|
11563
|
+
}
|
|
11564
|
+
};
|
|
11565
|
+
|
|
11319
11566
|
// src/convert/index.ts
|
|
11320
11567
|
var isConverting = false;
|
|
11321
11568
|
var queue = [];
|
|
@@ -11340,81 +11587,129 @@ async function acquireConvertLock() {
|
|
|
11340
11587
|
});
|
|
11341
11588
|
}
|
|
11342
11589
|
async function convertToPdf(input, options) {
|
|
11343
|
-
|
|
11344
|
-
|
|
11345
|
-
|
|
11346
|
-
buffer = await (0, import_promises2.readFile)(input);
|
|
11347
|
-
} else if (Buffer.isBuffer(input)) {
|
|
11348
|
-
buffer = input;
|
|
11349
|
-
} else {
|
|
11350
|
-
buffer = Buffer.from(input);
|
|
11351
|
-
}
|
|
11352
|
-
} catch (err) {
|
|
11353
|
-
return {
|
|
11354
|
-
success: false,
|
|
11355
|
-
code: "PARSE_ERROR",
|
|
11356
|
-
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
11357
|
-
stage: "detect"
|
|
11358
|
-
};
|
|
11359
|
-
}
|
|
11360
|
-
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
11361
|
-
if (buffer.length > MAX_FILE_SIZE) {
|
|
11362
|
-
return {
|
|
11363
|
-
success: false,
|
|
11364
|
-
code: "FILE_TOO_LARGE",
|
|
11365
|
-
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
11366
|
-
stage: "detect"
|
|
11367
|
-
};
|
|
11590
|
+
const emitter = new ConvertEventEmitter();
|
|
11591
|
+
if (options?.onEvent) {
|
|
11592
|
+
emitter.setListener(options.onEvent);
|
|
11368
11593
|
}
|
|
11369
|
-
|
|
11370
|
-
|
|
11371
|
-
|
|
11372
|
-
|
|
11373
|
-
|
|
11374
|
-
|
|
11375
|
-
|
|
11376
|
-
};
|
|
11594
|
+
if (options?.onProgress) {
|
|
11595
|
+
const legacyProgress = options.onProgress;
|
|
11596
|
+
emitter.setListener((event) => {
|
|
11597
|
+
if (event.type === "convert" && event.stage === "convert_progress") {
|
|
11598
|
+
legacyProgress(event.percent, event.message);
|
|
11599
|
+
}
|
|
11600
|
+
});
|
|
11377
11601
|
}
|
|
11378
11602
|
try {
|
|
11379
|
-
|
|
11380
|
-
|
|
11381
|
-
|
|
11603
|
+
emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
|
|
11604
|
+
let buffer;
|
|
11605
|
+
try {
|
|
11606
|
+
if (typeof input === "string") {
|
|
11607
|
+
buffer = await (0, import_promises3.readFile)(input);
|
|
11608
|
+
} else if (Buffer.isBuffer(input)) {
|
|
11609
|
+
buffer = input;
|
|
11610
|
+
} else {
|
|
11611
|
+
buffer = Buffer.from(input);
|
|
11612
|
+
}
|
|
11613
|
+
} catch (err) {
|
|
11614
|
+
emitter.error(
|
|
11615
|
+
"detect",
|
|
11616
|
+
"PARSE_ERROR",
|
|
11617
|
+
`\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
|
|
11618
|
+
);
|
|
11382
11619
|
return {
|
|
11383
11620
|
success: false,
|
|
11384
|
-
code:
|
|
11385
|
-
error: err.message
|
|
11386
|
-
stage: "
|
|
11621
|
+
code: "PARSE_ERROR",
|
|
11622
|
+
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
11623
|
+
stage: "detect"
|
|
11387
11624
|
};
|
|
11388
11625
|
}
|
|
11389
|
-
|
|
11390
|
-
|
|
11391
|
-
|
|
11392
|
-
|
|
11393
|
-
|
|
11394
|
-
|
|
11395
|
-
|
|
11396
|
-
|
|
11397
|
-
|
|
11398
|
-
|
|
11399
|
-
|
|
11400
|
-
|
|
11401
|
-
|
|
11402
|
-
|
|
11626
|
+
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
11627
|
+
if (buffer.length > MAX_FILE_SIZE) {
|
|
11628
|
+
emitter.error(
|
|
11629
|
+
"detect",
|
|
11630
|
+
"FILE_TOO_LARGE",
|
|
11631
|
+
`\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
|
|
11632
|
+
);
|
|
11633
|
+
return {
|
|
11634
|
+
success: false,
|
|
11635
|
+
code: "FILE_TOO_LARGE",
|
|
11636
|
+
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
11637
|
+
stage: "detect"
|
|
11638
|
+
};
|
|
11639
|
+
}
|
|
11640
|
+
const format = detectFormat(toArrayBuffer(buffer));
|
|
11641
|
+
emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
|
|
11642
|
+
if (format !== "hwp" && format !== "hwpx") {
|
|
11643
|
+
emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
|
|
11644
|
+
return {
|
|
11645
|
+
success: false,
|
|
11646
|
+
code: "UNSUPPORTED_FORMAT",
|
|
11647
|
+
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
11648
|
+
stage: "detect"
|
|
11649
|
+
};
|
|
11650
|
+
}
|
|
11651
|
+
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11652
|
+
let sofficePath;
|
|
11653
|
+
try {
|
|
11654
|
+
sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
|
|
11655
|
+
} catch (err) {
|
|
11656
|
+
if (err instanceof ConvertError) {
|
|
11657
|
+
return {
|
|
11658
|
+
success: false,
|
|
11659
|
+
code: err.code,
|
|
11660
|
+
error: err.message,
|
|
11661
|
+
stage: "validate"
|
|
11662
|
+
};
|
|
11663
|
+
}
|
|
11664
|
+
throw err;
|
|
11665
|
+
}
|
|
11666
|
+
const releaseLock = await acquireConvertLock();
|
|
11667
|
+
try {
|
|
11668
|
+
emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
|
|
11669
|
+
emitter.progress(10, "\uBCC0\uD658 \uC911...");
|
|
11670
|
+
const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
|
|
11671
|
+
emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
|
|
11672
|
+
emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
|
|
11673
|
+
const result = {
|
|
11674
|
+
success: true,
|
|
11675
|
+
pdf: new Uint8Array(pdf),
|
|
11676
|
+
sourceFormat: format
|
|
11677
|
+
};
|
|
11678
|
+
emitter.complete({
|
|
11679
|
+
sourceFormat: format,
|
|
11680
|
+
pdfSize: pdf.length
|
|
11681
|
+
});
|
|
11682
|
+
return result;
|
|
11683
|
+
} catch (err) {
|
|
11684
|
+
if (err instanceof ConvertError) {
|
|
11685
|
+
emitter.error("convert", err.code, err.message);
|
|
11686
|
+
return {
|
|
11687
|
+
success: false,
|
|
11688
|
+
code: err.code,
|
|
11689
|
+
error: err.message,
|
|
11690
|
+
stage: "convert"
|
|
11691
|
+
};
|
|
11692
|
+
}
|
|
11693
|
+
const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
|
|
11694
|
+
emitter.error("convert", classifyError(err), errorMsg);
|
|
11403
11695
|
return {
|
|
11404
11696
|
success: false,
|
|
11405
|
-
code: err
|
|
11406
|
-
error:
|
|
11697
|
+
code: classifyError(err),
|
|
11698
|
+
error: errorMsg,
|
|
11407
11699
|
stage: "convert"
|
|
11408
11700
|
};
|
|
11701
|
+
} finally {
|
|
11702
|
+
releaseLock();
|
|
11409
11703
|
}
|
|
11704
|
+
} catch (unexpectedErr) {
|
|
11705
|
+
const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
|
|
11706
|
+
emitter.error("convert", "PARSE_ERROR", errorMsg);
|
|
11410
11707
|
return {
|
|
11411
11708
|
success: false,
|
|
11412
|
-
code:
|
|
11413
|
-
error:
|
|
11709
|
+
code: "PARSE_ERROR",
|
|
11710
|
+
error: errorMsg,
|
|
11414
11711
|
stage: "convert"
|
|
11415
11712
|
};
|
|
11416
|
-
} finally {
|
|
11417
|
-
releaseLock();
|
|
11418
11713
|
}
|
|
11419
11714
|
}
|
|
11420
11715
|
async function convertHwpToPdf(input, options) {
|
|
@@ -11442,9 +11737,6 @@ async function convertHwpxToPdf(input, options) {
|
|
|
11442
11737
|
return result;
|
|
11443
11738
|
}
|
|
11444
11739
|
|
|
11445
|
-
// src/index.ts
|
|
11446
|
-
init_utils();
|
|
11447
|
-
|
|
11448
11740
|
// src/ocr/api-key-rotation.ts
|
|
11449
11741
|
var AllKeysCoolingDownError = class extends Error {
|
|
11450
11742
|
waitMs;
|
|
@@ -11539,9 +11831,9 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11539
11831
|
};
|
|
11540
11832
|
|
|
11541
11833
|
// src/pipeline/unified-ocr.ts
|
|
11542
|
-
var
|
|
11543
|
-
var
|
|
11544
|
-
var
|
|
11834
|
+
var import_promises4 = require("fs/promises");
|
|
11835
|
+
var import_path6 = require("path");
|
|
11836
|
+
var import_child_process5 = require("child_process");
|
|
11545
11837
|
var import_node_perf_hooks = require("perf_hooks");
|
|
11546
11838
|
init_logger();
|
|
11547
11839
|
|
|
@@ -11675,15 +11967,15 @@ function elapsedMs(startAt) {
|
|
|
11675
11967
|
return Math.round(import_node_perf_hooks.performance.now() - startAt);
|
|
11676
11968
|
}
|
|
11677
11969
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11678
|
-
const absInput = (0,
|
|
11679
|
-
const stem = (0,
|
|
11680
|
-
const workspaceDir = (0,
|
|
11681
|
-
const imagesDir = (0,
|
|
11682
|
-
const rawDir = (0,
|
|
11683
|
-
const diffDir = (0,
|
|
11684
|
-
const outputPath = (0,
|
|
11685
|
-
const reportPath = (0,
|
|
11686
|
-
const modelCachePath = (0,
|
|
11970
|
+
const absInput = (0, import_path6.resolve)(inputPath);
|
|
11971
|
+
const stem = (0, import_path6.basename)(absInput, (0, import_path6.extname)(absInput));
|
|
11972
|
+
const workspaceDir = (0, import_path6.resolve)(options.workspaceDir ?? (0, import_path6.join)((0, import_path6.dirname)(absInput), `${stem}_ocr_workspace`));
|
|
11973
|
+
const imagesDir = (0, import_path6.join)(workspaceDir, "images");
|
|
11974
|
+
const rawDir = (0, import_path6.join)(workspaceDir, "ocr", "raw");
|
|
11975
|
+
const diffDir = (0, import_path6.join)(workspaceDir, "ocr", "diff");
|
|
11976
|
+
const outputPath = (0, import_path6.resolve)(options.outputPath ?? (0, import_path6.join)((0, import_path6.dirname)(absInput), `${stem}.md`));
|
|
11977
|
+
const reportPath = (0, import_path6.join)(workspaceDir, "run-report.json");
|
|
11978
|
+
const modelCachePath = (0, import_path6.join)((0, import_path6.dirname)(absInput), ".kordoc-model-cache.json");
|
|
11687
11979
|
const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
|
|
11688
11980
|
const timeoutMs = options.timeoutMs ?? 6e4;
|
|
11689
11981
|
const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
|
|
@@ -11697,9 +11989,9 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11697
11989
|
const keyPool = ApiKeyRotationPool.fromEnv();
|
|
11698
11990
|
const runId = options.runId ?? generateRunId("ocr");
|
|
11699
11991
|
const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
|
|
11700
|
-
await (0,
|
|
11701
|
-
await (0,
|
|
11702
|
-
await (0,
|
|
11992
|
+
await (0, import_promises4.mkdir)(imagesDir, { recursive: true });
|
|
11993
|
+
await (0, import_promises4.mkdir)(rawDir, { recursive: true });
|
|
11994
|
+
await (0, import_promises4.mkdir)(diffDir, { recursive: true });
|
|
11703
11995
|
const timingsMs = {};
|
|
11704
11996
|
const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
|
|
11705
11997
|
const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
|
|
@@ -11715,12 +12007,30 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11715
12007
|
currentStage = "convert";
|
|
11716
12008
|
markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
|
|
11717
12009
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
11718
|
-
if ((0,
|
|
11719
|
-
|
|
11720
|
-
|
|
11721
|
-
|
|
12010
|
+
if ((0, import_path6.extname)(absInput).toLowerCase() !== ".pdf") {
|
|
12011
|
+
const convertEmitter = new ConvertEventEmitter();
|
|
12012
|
+
if (options.onEvent) {
|
|
12013
|
+
convertEmitter.setListener((evt) => {
|
|
12014
|
+
if (evt.type === "install" || evt.type === "validate" || evt.type === "error") {
|
|
12015
|
+
try {
|
|
12016
|
+
;
|
|
12017
|
+
options.onEvent(evt);
|
|
12018
|
+
} catch {
|
|
12019
|
+
}
|
|
12020
|
+
}
|
|
12021
|
+
});
|
|
12022
|
+
}
|
|
12023
|
+
if (options.sofficePath) {
|
|
12024
|
+
const sofficeDir = (0, import_path6.dirname)(options.sofficePath);
|
|
12025
|
+
process.env.PATH = `${sofficeDir}${import_path6.delimiter}${process.env.PATH ?? ""}`;
|
|
12026
|
+
convertEmitter.validate("soffice_found", "\uC9C1\uC811 \uC9C0\uC815\uB41C LibreOffice \uACBD\uB85C \uC0AC\uC6A9", { sofficePath: options.sofficePath });
|
|
12027
|
+
} else {
|
|
12028
|
+
await resolveSoffice(convertEmitter, options.autoInstallLibreOffice ?? false);
|
|
12029
|
+
}
|
|
12030
|
+
workingPdfPath = (0, import_path6.join)(workspaceDir, `${stem}.pdf`);
|
|
12031
|
+
const inputBuffer = await (0, import_promises4.readFile)(absInput);
|
|
11722
12032
|
const out = await convertBuffer(inputBuffer, ".pdf");
|
|
11723
|
-
await (0,
|
|
12033
|
+
await (0, import_promises4.writeFile)(workingPdfPath, out);
|
|
11724
12034
|
}
|
|
11725
12035
|
timingsMs.convert = elapsedMs(convertStart);
|
|
11726
12036
|
markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
|
|
@@ -11731,10 +12041,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11731
12041
|
if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11732
12042
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
11733
12043
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
|
|
11734
|
-
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, (0,
|
|
11735
|
-
const firstFiles = (await (0,
|
|
12044
|
+
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, (0, import_path6.join)(imagesDir, "page")]);
|
|
12045
|
+
const firstFiles = (await (0, import_promises4.readdir)(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
11736
12046
|
if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
|
|
11737
|
-
const probeImage = (0,
|
|
12047
|
+
const probeImage = (0, import_path6.join)(imagesDir, firstFiles[0]);
|
|
11738
12048
|
markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
|
|
11739
12049
|
const probeStart = import_node_perf_hooks.performance.now();
|
|
11740
12050
|
currentStage = "probe";
|
|
@@ -11780,7 +12090,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11780
12090
|
try {
|
|
11781
12091
|
await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
|
|
11782
12092
|
if (totalPages > 1) {
|
|
11783
|
-
for await (const item of renderPdfToPngStream(workingPdfPath, (0,
|
|
12093
|
+
for await (const item of renderPdfToPngStream(workingPdfPath, (0, import_path6.join)(imagesDir, "page"), dpi, totalPages, 2)) {
|
|
11784
12094
|
await queue2.enqueue(item);
|
|
11785
12095
|
renderDone++;
|
|
11786
12096
|
markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
|
|
@@ -11830,8 +12140,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11830
12140
|
const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
|
|
11831
12141
|
const rawPagePaths = [];
|
|
11832
12142
|
for (const [pageNum, markdown] of sortedEntries) {
|
|
11833
|
-
const pagePath = (0,
|
|
11834
|
-
await (0,
|
|
12143
|
+
const pagePath = (0, import_path6.join)(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
|
|
12144
|
+
await (0, import_promises4.writeFile)(pagePath, markdown, "utf-8");
|
|
11835
12145
|
rawPagePaths.push(pagePath);
|
|
11836
12146
|
}
|
|
11837
12147
|
const mergeStart = import_node_perf_hooks.performance.now();
|
|
@@ -11839,7 +12149,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11839
12149
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11840
12150
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
11841
12151
|
const merged = await mergeMarkdownPages(rawPagePaths);
|
|
11842
|
-
await (0,
|
|
12152
|
+
await (0, import_promises4.writeFile)(outputPath, merged, "utf-8");
|
|
11843
12153
|
timingsMs.merge = elapsedMs(mergeStart);
|
|
11844
12154
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
11845
12155
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
@@ -11855,7 +12165,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11855
12165
|
timingsMs,
|
|
11856
12166
|
modelCachePath
|
|
11857
12167
|
};
|
|
11858
|
-
await (0,
|
|
12168
|
+
await (0, import_promises4.writeFile)(reportPath, JSON.stringify(report, null, 2), "utf-8");
|
|
11859
12169
|
logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
|
|
11860
12170
|
return { outputPath, reportPath, selectedModel };
|
|
11861
12171
|
} catch (err) {
|
|
@@ -11930,7 +12240,7 @@ async function getPdfPageCount(pdfPath) {
|
|
|
11930
12240
|
return n;
|
|
11931
12241
|
}
|
|
11932
12242
|
async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, startPage = 1) {
|
|
11933
|
-
const imagesDir = (0,
|
|
12243
|
+
const imagesDir = (0, import_path6.dirname)(prefixPath);
|
|
11934
12244
|
for (let page = startPage; page <= totalPages; page++) {
|
|
11935
12245
|
try {
|
|
11936
12246
|
await runCommand("pdftoppm", [
|
|
@@ -11944,9 +12254,9 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
11944
12254
|
pdfPath,
|
|
11945
12255
|
prefixPath
|
|
11946
12256
|
]);
|
|
11947
|
-
const files = await (0,
|
|
12257
|
+
const files = await (0, import_promises4.readdir)(imagesDir);
|
|
11948
12258
|
const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
11949
|
-
const imagePath = (0,
|
|
12259
|
+
const imagePath = (0, import_path6.join)(imagesDir, pageFiles[pageFiles.length - 1]);
|
|
11950
12260
|
yield { pageNumber: page, imagePath };
|
|
11951
12261
|
} catch (err) {
|
|
11952
12262
|
yield {
|
|
@@ -11959,7 +12269,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
11959
12269
|
}
|
|
11960
12270
|
async function runCommand(cmd, args) {
|
|
11961
12271
|
await new Promise((resolvePromise, reject) => {
|
|
11962
|
-
const child = (0,
|
|
12272
|
+
const child = (0, import_child_process5.spawn)(cmd, args, { stdio: "pipe" });
|
|
11963
12273
|
let stderr = "";
|
|
11964
12274
|
child.stderr.on("data", (d) => {
|
|
11965
12275
|
stderr += String(d);
|
|
@@ -11973,7 +12283,7 @@ async function runCommand(cmd, args) {
|
|
|
11973
12283
|
}
|
|
11974
12284
|
async function runCommandWithStdout(cmd, args) {
|
|
11975
12285
|
return await new Promise((resolvePromise, reject) => {
|
|
11976
|
-
const child = (0,
|
|
12286
|
+
const child = (0, import_child_process5.spawn)(cmd, args, { stdio: "pipe" });
|
|
11977
12287
|
let stdout = "";
|
|
11978
12288
|
let stderr = "";
|
|
11979
12289
|
child.stdout.on("data", (d) => {
|
|
@@ -12062,7 +12372,7 @@ function startParallelProbeRuns(input) {
|
|
|
12062
12372
|
}
|
|
12063
12373
|
async function loadModelCache(path) {
|
|
12064
12374
|
try {
|
|
12065
|
-
const raw = await (0,
|
|
12375
|
+
const raw = await (0, import_promises4.readFile)(path, "utf-8");
|
|
12066
12376
|
return JSON.parse(raw);
|
|
12067
12377
|
} catch {
|
|
12068
12378
|
return null;
|
|
@@ -12093,7 +12403,7 @@ async function updateModelCache(path, probes) {
|
|
|
12093
12403
|
}
|
|
12094
12404
|
}
|
|
12095
12405
|
current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
12096
|
-
await (0,
|
|
12406
|
+
await (0, import_promises4.writeFile)(path, JSON.stringify(current, null, 2), "utf-8");
|
|
12097
12407
|
}
|
|
12098
12408
|
async function ocrWorkerPool(input) {
|
|
12099
12409
|
const { queue: queue2, workerCount, ocrInput, onPageDone } = input;
|
|
@@ -12153,7 +12463,7 @@ async function ocrImageWithFallback(input) {
|
|
|
12153
12463
|
async function mergeMarkdownPages(paths) {
|
|
12154
12464
|
const out = [];
|
|
12155
12465
|
for (let i = 0; i < paths.length; i++) {
|
|
12156
|
-
const txt = (await (0,
|
|
12466
|
+
const txt = (await (0, import_promises4.readFile)(paths[i], "utf-8")).trim();
|
|
12157
12467
|
if (!txt) continue;
|
|
12158
12468
|
out.push(txt);
|
|
12159
12469
|
}
|
|
@@ -12269,7 +12579,7 @@ async function ocrImageViaNim(input) {
|
|
|
12269
12579
|
throw new UnifiedOcrError("OCR_FAILED", "ocr", `OCR \uC7AC\uC2DC\uB3C4 \uCD08\uACFC: ${lastErr}`);
|
|
12270
12580
|
}
|
|
12271
12581
|
async function encodeBase64(path) {
|
|
12272
|
-
const b = await (0,
|
|
12582
|
+
const b = await (0, import_promises4.readFile)(path);
|
|
12273
12583
|
return b.toString("base64");
|
|
12274
12584
|
}
|
|
12275
12585
|
function stripCodeFence3(text) {
|
|
@@ -12281,7 +12591,7 @@ async function delay(ms) {
|
|
|
12281
12591
|
await new Promise((resolvePromise) => setTimeout(resolvePromise, ms));
|
|
12282
12592
|
}
|
|
12283
12593
|
function ensureSupportedInput(path) {
|
|
12284
|
-
const ext = (0,
|
|
12594
|
+
const ext = (0, import_path6.extname)(path).toLowerCase();
|
|
12285
12595
|
const allowed = /* @__PURE__ */ new Set([".pdf", ".hwp", ".hwpx", ".docx", ".xlsx"]);
|
|
12286
12596
|
if (!allowed.has(ext)) {
|
|
12287
12597
|
throw new UnifiedOcrError("UNSUPPORTED_INPUT", "convert", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uC785\uB825 \uD3EC\uB9F7: ${ext}`);
|
|
@@ -12289,6 +12599,16 @@ function ensureSupportedInput(path) {
|
|
|
12289
12599
|
}
|
|
12290
12600
|
function normalizePipelineError(err, stage) {
|
|
12291
12601
|
if (err instanceof UnifiedOcrError) return err;
|
|
12602
|
+
if (err instanceof ConvertError) {
|
|
12603
|
+
const codeMap = {
|
|
12604
|
+
SOFFICE_NOT_FOUND: "SOFFICE_NOT_FOUND",
|
|
12605
|
+
CONVERT_FAILED: "CONVERT_FAILED",
|
|
12606
|
+
TIMEOUT: "CONVERT_FAILED",
|
|
12607
|
+
UNSUPPORTED_PLATFORM: "CONVERT_FAILED",
|
|
12608
|
+
UNSUPPORTED_FORMAT: "UNSUPPORTED_INPUT"
|
|
12609
|
+
};
|
|
12610
|
+
return new UnifiedOcrError(codeMap[err.code] ?? "CONVERT_FAILED", stage, err.message);
|
|
12611
|
+
}
|
|
12292
12612
|
const message = err instanceof Error ? err.message : String(err);
|
|
12293
12613
|
const codeByStage = {
|
|
12294
12614
|
convert: "CONVERT_FAILED",
|
|
@@ -12308,7 +12628,7 @@ async function parse2(input, options) {
|
|
|
12308
12628
|
let buffer;
|
|
12309
12629
|
if (typeof input === "string") {
|
|
12310
12630
|
try {
|
|
12311
|
-
const buf = await (0,
|
|
12631
|
+
const buf = await (0, import_promises5.readFile)(input);
|
|
12312
12632
|
buffer = toArrayBuffer(buf);
|
|
12313
12633
|
} catch (err) {
|
|
12314
12634
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|