@clazic/kordoc 2.5.2 → 2.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -1
- package/dist/{chunk-5CILZHRW.js → chunk-4X5JCZFZ.js} +2 -2
- package/dist/{chunk-25ZYYLVP.js → chunk-BZPZXI66.js} +441 -6
- package/dist/chunk-BZPZXI66.js.map +1 -0
- package/dist/cli.js +87 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +649 -157
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +127 -2
- package/dist/index.d.ts +127 -2
- package/dist/index.js +639 -150
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +43 -2
- package/dist/mcp.js.map +1 -1
- package/dist/{utils-H2BL5GNR.js → utils-56QT5C33.js} +2 -2
- package/dist/{watch-D6ODQLPJ.js → watch-HRNMJWSE.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-25ZYYLVP.js.map +0 -1
- /package/dist/{chunk-5CILZHRW.js.map → chunk-4X5JCZFZ.js.map} +0 -0
- /package/dist/{utils-H2BL5GNR.js.map → utils-56QT5C33.js.map} +0 -0
- /package/dist/{watch-D6ODQLPJ.js.map → watch-HRNMJWSE.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -33,6 +33,118 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
33
33
|
));
|
|
34
34
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
35
35
|
|
|
36
|
+
// src/utils.ts
|
|
37
|
+
var utils_exports = {};
|
|
38
|
+
__export(utils_exports, {
|
|
39
|
+
KordocError: () => KordocError,
|
|
40
|
+
VERSION: () => VERSION,
|
|
41
|
+
classifyError: () => classifyError,
|
|
42
|
+
isPathTraversal: () => isPathTraversal,
|
|
43
|
+
normalizeKordocError: () => normalizeKordocError,
|
|
44
|
+
precheckZipSize: () => precheckZipSize,
|
|
45
|
+
sanitizeError: () => sanitizeError,
|
|
46
|
+
sanitizeHref: () => sanitizeHref,
|
|
47
|
+
toArrayBuffer: () => toArrayBuffer
|
|
48
|
+
});
|
|
49
|
+
function toArrayBuffer(buf) {
|
|
50
|
+
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
51
|
+
return buf.buffer;
|
|
52
|
+
}
|
|
53
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
54
|
+
}
|
|
55
|
+
function sanitizeError(err) {
|
|
56
|
+
if (err instanceof KordocError) return err.message;
|
|
57
|
+
return "\uBB38\uC11C \uCC98\uB9AC \uC911 \uC624\uB958\uAC00 \uBC1C\uC0DD\uD588\uC2B5\uB2C8\uB2E4";
|
|
58
|
+
}
|
|
59
|
+
function isPathTraversal(name) {
|
|
60
|
+
if (name.includes("\0")) return true;
|
|
61
|
+
const normalized = name.replace(/\\/g, "/");
|
|
62
|
+
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
63
|
+
}
|
|
64
|
+
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
65
|
+
try {
|
|
66
|
+
const data = new DataView(buffer);
|
|
67
|
+
const len = buffer.byteLength;
|
|
68
|
+
let eocdOffset = -1;
|
|
69
|
+
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
70
|
+
if (data.getUint32(i, true) === 101010256) {
|
|
71
|
+
eocdOffset = i;
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
76
|
+
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
77
|
+
if (entryCount > maxEntries) {
|
|
78
|
+
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
79
|
+
}
|
|
80
|
+
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
81
|
+
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
82
|
+
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
83
|
+
let totalUncompressed = 0;
|
|
84
|
+
let pos = cdOffset;
|
|
85
|
+
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
86
|
+
if (data.getUint32(pos, true) !== 33639248) break;
|
|
87
|
+
totalUncompressed += data.getUint32(pos + 24, true);
|
|
88
|
+
const nameLen = data.getUint16(pos + 28, true);
|
|
89
|
+
const extraLen = data.getUint16(pos + 30, true);
|
|
90
|
+
const commentLen = data.getUint16(pos + 32, true);
|
|
91
|
+
pos += 46 + nameLen + extraLen + commentLen;
|
|
92
|
+
}
|
|
93
|
+
if (totalUncompressed > maxUncompressedSize) {
|
|
94
|
+
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
95
|
+
}
|
|
96
|
+
return { totalUncompressed, entryCount };
|
|
97
|
+
} catch (err) {
|
|
98
|
+
if (err instanceof KordocError) throw err;
|
|
99
|
+
return { totalUncompressed: 0, entryCount: 0 };
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
function sanitizeHref(href) {
|
|
103
|
+
const trimmed = href.trim();
|
|
104
|
+
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
105
|
+
return trimmed;
|
|
106
|
+
}
|
|
107
|
+
function classifyError(err) {
|
|
108
|
+
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
109
|
+
const msg = err.message;
|
|
110
|
+
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
111
|
+
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
112
|
+
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
113
|
+
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
114
|
+
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
115
|
+
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
116
|
+
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
117
|
+
return "PARSE_ERROR";
|
|
118
|
+
}
|
|
119
|
+
function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
|
|
120
|
+
if (err instanceof KordocError) {
|
|
121
|
+
if (!err.stage) err.stage = stage;
|
|
122
|
+
if (!err.code) err.code = fallbackCode;
|
|
123
|
+
return err;
|
|
124
|
+
}
|
|
125
|
+
const message = err instanceof Error ? err.message : fallbackMessage;
|
|
126
|
+
const code = err instanceof Error ? classifyError(err) : fallbackCode;
|
|
127
|
+
return new KordocError(message || fallbackMessage, { code, stage });
|
|
128
|
+
}
|
|
129
|
+
var VERSION, KordocError, SAFE_HREF_RE;
|
|
130
|
+
var init_utils = __esm({
|
|
131
|
+
"src/utils.ts"() {
|
|
132
|
+
"use strict";
|
|
133
|
+
VERSION = true ? "2.6.0" : "0.0.0-dev";
|
|
134
|
+
KordocError = class extends Error {
|
|
135
|
+
code;
|
|
136
|
+
stage;
|
|
137
|
+
constructor(message, opts = {}) {
|
|
138
|
+
super(message);
|
|
139
|
+
this.name = "KordocError";
|
|
140
|
+
this.code = opts.code;
|
|
141
|
+
this.stage = opts.stage;
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
|
|
36
148
|
// src/page-range.ts
|
|
37
149
|
var page_range_exports = {};
|
|
38
150
|
__export(page_range_exports, {
|
|
@@ -3062,6 +3174,9 @@ __export(index_exports, {
|
|
|
3062
3174
|
VERSION: () => VERSION,
|
|
3063
3175
|
blocksToMarkdown: () => blocksToMarkdown,
|
|
3064
3176
|
compare: () => compare,
|
|
3177
|
+
convertHwpToPdf: () => convertHwpToPdf,
|
|
3178
|
+
convertHwpxToPdf: () => convertHwpxToPdf,
|
|
3179
|
+
convertToPdf: () => convertToPdf,
|
|
3065
3180
|
detectFormat: () => detectFormat,
|
|
3066
3181
|
detectZipFormat: () => detectZipFormat,
|
|
3067
3182
|
diffBlocks: () => diffBlocks,
|
|
@@ -3081,7 +3196,7 @@ __export(index_exports, {
|
|
|
3081
3196
|
runUnifiedOcrPipeline: () => runUnifiedOcrPipeline
|
|
3082
3197
|
});
|
|
3083
3198
|
module.exports = __toCommonJS(index_exports);
|
|
3084
|
-
var
|
|
3199
|
+
var import_promises5 = require("fs/promises");
|
|
3085
3200
|
|
|
3086
3201
|
// src/detect.ts
|
|
3087
3202
|
var import_jszip = __toESM(require("jszip"), 1);
|
|
@@ -3133,97 +3248,8 @@ async function detectZipFormat(buffer) {
|
|
|
3133
3248
|
var import_jszip2 = __toESM(require("jszip"), 1);
|
|
3134
3249
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3135
3250
|
|
|
3136
|
-
// src/utils.ts
|
|
3137
|
-
var VERSION = true ? "2.5.1" : "0.0.0-dev";
|
|
3138
|
-
function toArrayBuffer(buf) {
|
|
3139
|
-
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3140
|
-
return buf.buffer;
|
|
3141
|
-
}
|
|
3142
|
-
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
3143
|
-
}
|
|
3144
|
-
var KordocError = class extends Error {
|
|
3145
|
-
code;
|
|
3146
|
-
stage;
|
|
3147
|
-
constructor(message, opts = {}) {
|
|
3148
|
-
super(message);
|
|
3149
|
-
this.name = "KordocError";
|
|
3150
|
-
this.code = opts.code;
|
|
3151
|
-
this.stage = opts.stage;
|
|
3152
|
-
}
|
|
3153
|
-
};
|
|
3154
|
-
function isPathTraversal(name) {
|
|
3155
|
-
if (name.includes("\0")) return true;
|
|
3156
|
-
const normalized = name.replace(/\\/g, "/");
|
|
3157
|
-
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
3158
|
-
}
|
|
3159
|
-
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
3160
|
-
try {
|
|
3161
|
-
const data = new DataView(buffer);
|
|
3162
|
-
const len = buffer.byteLength;
|
|
3163
|
-
let eocdOffset = -1;
|
|
3164
|
-
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
3165
|
-
if (data.getUint32(i, true) === 101010256) {
|
|
3166
|
-
eocdOffset = i;
|
|
3167
|
-
break;
|
|
3168
|
-
}
|
|
3169
|
-
}
|
|
3170
|
-
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
3171
|
-
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
3172
|
-
if (entryCount > maxEntries) {
|
|
3173
|
-
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
3174
|
-
}
|
|
3175
|
-
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
3176
|
-
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
3177
|
-
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
3178
|
-
let totalUncompressed = 0;
|
|
3179
|
-
let pos = cdOffset;
|
|
3180
|
-
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
3181
|
-
if (data.getUint32(pos, true) !== 33639248) break;
|
|
3182
|
-
totalUncompressed += data.getUint32(pos + 24, true);
|
|
3183
|
-
const nameLen = data.getUint16(pos + 28, true);
|
|
3184
|
-
const extraLen = data.getUint16(pos + 30, true);
|
|
3185
|
-
const commentLen = data.getUint16(pos + 32, true);
|
|
3186
|
-
pos += 46 + nameLen + extraLen + commentLen;
|
|
3187
|
-
}
|
|
3188
|
-
if (totalUncompressed > maxUncompressedSize) {
|
|
3189
|
-
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
3190
|
-
}
|
|
3191
|
-
return { totalUncompressed, entryCount };
|
|
3192
|
-
} catch (err) {
|
|
3193
|
-
if (err instanceof KordocError) throw err;
|
|
3194
|
-
return { totalUncompressed: 0, entryCount: 0 };
|
|
3195
|
-
}
|
|
3196
|
-
}
|
|
3197
|
-
var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
3198
|
-
function sanitizeHref(href) {
|
|
3199
|
-
const trimmed = href.trim();
|
|
3200
|
-
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
3201
|
-
return trimmed;
|
|
3202
|
-
}
|
|
3203
|
-
function classifyError(err) {
|
|
3204
|
-
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
3205
|
-
const msg = err.message;
|
|
3206
|
-
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
3207
|
-
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
3208
|
-
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
3209
|
-
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
3210
|
-
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
3211
|
-
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
3212
|
-
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
3213
|
-
return "PARSE_ERROR";
|
|
3214
|
-
}
|
|
3215
|
-
function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
|
|
3216
|
-
if (err instanceof KordocError) {
|
|
3217
|
-
if (!err.stage) err.stage = stage;
|
|
3218
|
-
if (!err.code) err.code = fallbackCode;
|
|
3219
|
-
return err;
|
|
3220
|
-
}
|
|
3221
|
-
const message = err instanceof Error ? err.message : fallbackMessage;
|
|
3222
|
-
const code = err instanceof Error ? classifyError(err) : fallbackCode;
|
|
3223
|
-
return new KordocError(message || fallbackMessage, { code, stage });
|
|
3224
|
-
}
|
|
3225
|
-
|
|
3226
3251
|
// src/table/builder.ts
|
|
3252
|
+
init_utils();
|
|
3227
3253
|
var MAX_COLS = 200;
|
|
3228
3254
|
var MAX_ROWS = 1e4;
|
|
3229
3255
|
function buildTable(rows) {
|
|
@@ -3483,6 +3509,8 @@ var HEADING_RATIO_H2 = 1.3;
|
|
|
3483
3509
|
var HEADING_RATIO_H3 = 1.15;
|
|
3484
3510
|
|
|
3485
3511
|
// src/hwpx/parser.ts
|
|
3512
|
+
init_utils();
|
|
3513
|
+
init_utils();
|
|
3486
3514
|
init_page_range();
|
|
3487
3515
|
init_logger();
|
|
3488
3516
|
var MAX_DECOMPRESS_SIZE = 500 * 1024 * 1024;
|
|
@@ -4325,6 +4353,7 @@ function extractTextFromNode(node) {
|
|
|
4325
4353
|
|
|
4326
4354
|
// src/hwp5/record.ts
|
|
4327
4355
|
var import_zlib = require("zlib");
|
|
4356
|
+
init_utils();
|
|
4328
4357
|
var TAG_PARA_HEADER = 66;
|
|
4329
4358
|
var TAG_PARA_TEXT = 67;
|
|
4330
4359
|
var TAG_CHAR_SHAPE = 68;
|
|
@@ -5374,6 +5403,7 @@ function parseLenientCfb(data) {
|
|
|
5374
5403
|
}
|
|
5375
5404
|
|
|
5376
5405
|
// src/hwp5/parser.ts
|
|
5406
|
+
init_utils();
|
|
5377
5407
|
init_page_range();
|
|
5378
5408
|
init_logger();
|
|
5379
5409
|
var CFB = __toESM(require_cfb(), 1);
|
|
@@ -6029,6 +6059,7 @@ function arrangeCells(rows, cols, cells) {
|
|
|
6029
6059
|
}
|
|
6030
6060
|
|
|
6031
6061
|
// src/pdf/parser.ts
|
|
6062
|
+
init_utils();
|
|
6032
6063
|
init_page_range();
|
|
6033
6064
|
var import_module = require("module");
|
|
6034
6065
|
var import_path4 = require("path");
|
|
@@ -7922,6 +7953,7 @@ function mergeKoreanLines(text) {
|
|
|
7922
7953
|
// src/xlsx/parser.ts
|
|
7923
7954
|
var import_jszip3 = __toESM(require("jszip"), 1);
|
|
7924
7955
|
var import_xmldom2 = require("@xmldom/xmldom");
|
|
7956
|
+
init_utils();
|
|
7925
7957
|
init_logger();
|
|
7926
7958
|
var MAX_SHEETS = 100;
|
|
7927
7959
|
var MAX_DECOMPRESS_SIZE3 = 500 * 1024 * 1024;
|
|
@@ -8250,6 +8282,7 @@ async function parseXlsxDocument(buffer, options, existingZip) {
|
|
|
8250
8282
|
// src/docx/parser.ts
|
|
8251
8283
|
var import_jszip4 = __toESM(require("jszip"), 1);
|
|
8252
8284
|
var import_xmldom3 = require("@xmldom/xmldom");
|
|
8285
|
+
init_utils();
|
|
8253
8286
|
init_logger();
|
|
8254
8287
|
var MAX_DECOMPRESS_SIZE4 = 500 * 1024 * 1024;
|
|
8255
8288
|
function getChildElements(parent, localName) {
|
|
@@ -8729,6 +8762,7 @@ async function parseDocxDocument(buffer, options, existingZip) {
|
|
|
8729
8762
|
}
|
|
8730
8763
|
|
|
8731
8764
|
// src/index.ts
|
|
8765
|
+
init_utils();
|
|
8732
8766
|
init_cli_provider();
|
|
8733
8767
|
init_markdown_to_blocks();
|
|
8734
8768
|
init_logger();
|
|
@@ -11230,6 +11264,481 @@ async function markdownToXlsx(markdown, options) {
|
|
|
11230
11264
|
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
|
|
11231
11265
|
}
|
|
11232
11266
|
|
|
11267
|
+
// src/convert/index.ts
|
|
11268
|
+
var import_promises3 = require("fs/promises");
|
|
11269
|
+
init_utils();
|
|
11270
|
+
|
|
11271
|
+
// src/convert/libreoffice.ts
|
|
11272
|
+
var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
|
|
11273
|
+
|
|
11274
|
+
// src/convert/error.ts
|
|
11275
|
+
var ConvertError = class extends Error {
|
|
11276
|
+
constructor(code, message) {
|
|
11277
|
+
super(message);
|
|
11278
|
+
this.code = code;
|
|
11279
|
+
this.name = "ConvertError";
|
|
11280
|
+
}
|
|
11281
|
+
};
|
|
11282
|
+
|
|
11283
|
+
// src/convert/installer.ts
|
|
11284
|
+
var import_os3 = require("os");
|
|
11285
|
+
var import_path5 = require("path");
|
|
11286
|
+
var import_promises2 = require("fs/promises");
|
|
11287
|
+
var import_fs4 = require("fs");
|
|
11288
|
+
var import_child_process4 = require("child_process");
|
|
11289
|
+
var CACHE_DIR = (0, import_path5.join)((0, import_os3.homedir)(), ".cache", "kordoc", "libreoffice");
|
|
11290
|
+
var VERSION_FILE = (0, import_path5.join)(CACHE_DIR, "version");
|
|
11291
|
+
var PACKAGES = {
|
|
11292
|
+
darwin: {
|
|
11293
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/mac/x86_64/LibreOffice_24.8.4_MacOS_x86-64.dmg",
|
|
11294
|
+
binPath: "LibreOffice.app/Contents/MacOS/soffice",
|
|
11295
|
+
sizeMb: 300
|
|
11296
|
+
},
|
|
11297
|
+
linux: {
|
|
11298
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/deb/x86_64/LibreOffice_24.8.4_Linux_x86-64_deb.tar.gz",
|
|
11299
|
+
binPath: "opt/libreoffice24.8/program/soffice",
|
|
11300
|
+
sizeMb: 200
|
|
11301
|
+
},
|
|
11302
|
+
win32: {
|
|
11303
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
|
|
11304
|
+
binPath: "LibreOffice/program/soffice.exe",
|
|
11305
|
+
sizeMb: 350
|
|
11306
|
+
}
|
|
11307
|
+
};
|
|
11308
|
+
async function findInPath() {
|
|
11309
|
+
try {
|
|
11310
|
+
const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
|
|
11311
|
+
await runCommand2("soffice", ["--version"]);
|
|
11312
|
+
return "soffice";
|
|
11313
|
+
} catch {
|
|
11314
|
+
return null;
|
|
11315
|
+
}
|
|
11316
|
+
}
|
|
11317
|
+
async function findInCache() {
|
|
11318
|
+
const cachedBin = (0, import_path5.join)(CACHE_DIR, "bin", "soffice");
|
|
11319
|
+
try {
|
|
11320
|
+
await (0, import_promises2.access)(cachedBin);
|
|
11321
|
+
return cachedBin;
|
|
11322
|
+
} catch {
|
|
11323
|
+
return null;
|
|
11324
|
+
}
|
|
11325
|
+
}
|
|
11326
|
+
async function downloadWithProgress(url, dest, totalBytes, onProgress) {
|
|
11327
|
+
const response = await fetch(url);
|
|
11328
|
+
if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
|
|
11329
|
+
const file = (0, import_fs4.createWriteStream)(dest);
|
|
11330
|
+
const reader = response.body.getReader();
|
|
11331
|
+
let downloaded = 0;
|
|
11332
|
+
try {
|
|
11333
|
+
while (true) {
|
|
11334
|
+
const { done, value } = await reader.read();
|
|
11335
|
+
if (done) break;
|
|
11336
|
+
file.write(value);
|
|
11337
|
+
downloaded += value.length;
|
|
11338
|
+
onProgress?.(downloaded, totalBytes);
|
|
11339
|
+
}
|
|
11340
|
+
} finally {
|
|
11341
|
+
file.end();
|
|
11342
|
+
reader.releaseLock();
|
|
11343
|
+
}
|
|
11344
|
+
}
|
|
11345
|
+
async function installForPlatform(pkg, onProgress) {
|
|
11346
|
+
const platform = process.platform;
|
|
11347
|
+
await (0, import_promises2.mkdir)(CACHE_DIR, { recursive: true });
|
|
11348
|
+
const downloadPath = (0, import_path5.join)(CACHE_DIR, `download-${Date.now()}`);
|
|
11349
|
+
await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
|
|
11350
|
+
try {
|
|
11351
|
+
if (platform === "darwin") {
|
|
11352
|
+
return await installMacOS(pkg, downloadPath);
|
|
11353
|
+
} else if (platform === "linux") {
|
|
11354
|
+
return await installLinux(pkg, downloadPath);
|
|
11355
|
+
} else if (platform === "win32") {
|
|
11356
|
+
return await installWindows(pkg, downloadPath);
|
|
11357
|
+
}
|
|
11358
|
+
} catch (err) {
|
|
11359
|
+
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11360
|
+
throw err;
|
|
11361
|
+
}
|
|
11362
|
+
throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
|
|
11363
|
+
}
|
|
11364
|
+
async function installMacOS(pkg, downloadPath) {
|
|
11365
|
+
const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
|
|
11366
|
+
await new Promise((resolve4, reject) => {
|
|
11367
|
+
const child = (0, import_child_process4.spawn)("hdiutil", ["attach", "-nobrowse", "-mountpoint", mountPoint, downloadPath]);
|
|
11368
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328")));
|
|
11369
|
+
});
|
|
11370
|
+
try {
|
|
11371
|
+
const appSource = (0, import_path5.join)(mountPoint, "LibreOffice.app");
|
|
11372
|
+
const appDest = (0, import_path5.join)(CACHE_DIR, "LibreOffice.app");
|
|
11373
|
+
await new Promise((resolve4, reject) => {
|
|
11374
|
+
const child = (0, import_child_process4.spawn)("cp", ["-R", appSource, appDest]);
|
|
11375
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
|
|
11376
|
+
});
|
|
11377
|
+
} finally {
|
|
11378
|
+
await new Promise((resolve4) => {
|
|
11379
|
+
const child = (0, import_child_process4.spawn)("hdiutil", ["detach", mountPoint]);
|
|
11380
|
+
child.on("close", () => resolve4());
|
|
11381
|
+
});
|
|
11382
|
+
}
|
|
11383
|
+
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11384
|
+
return await createSymlink((0, import_path5.join)(CACHE_DIR, pkg.binPath));
|
|
11385
|
+
}
|
|
11386
|
+
async function installLinux(pkg, downloadPath) {
|
|
11387
|
+
const extractDir = (0, import_path5.join)(CACHE_DIR, `extract-${Date.now()}`);
|
|
11388
|
+
await (0, import_promises2.mkdir)(extractDir, { recursive: true });
|
|
11389
|
+
await new Promise((resolve4, reject) => {
|
|
11390
|
+
const child = (0, import_child_process4.spawn)("tar", ["xzf", downloadPath, "-C", extractDir]);
|
|
11391
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
|
|
11392
|
+
});
|
|
11393
|
+
const debsDir = (0, import_path5.join)(extractDir, "DEBS");
|
|
11394
|
+
try {
|
|
11395
|
+
await (0, import_promises2.access)(debsDir);
|
|
11396
|
+
const entries = await (await import("fs/promises")).readdir(debsDir);
|
|
11397
|
+
for (const entry of entries) {
|
|
11398
|
+
if (entry.endsWith(".deb")) {
|
|
11399
|
+
await new Promise((resolve4, reject) => {
|
|
11400
|
+
const child = (0, import_child_process4.spawn)("dpkg-deb", ["-x", (0, import_path5.join)(debsDir, entry), CACHE_DIR]);
|
|
11401
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
|
|
11402
|
+
});
|
|
11403
|
+
}
|
|
11404
|
+
}
|
|
11405
|
+
} catch {
|
|
11406
|
+
}
|
|
11407
|
+
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11408
|
+
await (0, import_promises2.rm)(extractDir, { recursive: true, force: true });
|
|
11409
|
+
return await createSymlink((0, import_path5.join)(CACHE_DIR, pkg.binPath));
|
|
11410
|
+
}
|
|
11411
|
+
async function installWindows(pkg, downloadPath) {
|
|
11412
|
+
await new Promise((resolve4, reject) => {
|
|
11413
|
+
const child = (0, import_child_process4.spawn)("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
|
|
11414
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
|
|
11415
|
+
});
|
|
11416
|
+
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11417
|
+
return (0, import_path5.join)(CACHE_DIR, pkg.binPath);
|
|
11418
|
+
}
|
|
11419
|
+
async function createSymlink(actualBin) {
|
|
11420
|
+
const binDir = (0, import_path5.join)(CACHE_DIR, "bin");
|
|
11421
|
+
await (0, import_promises2.mkdir)(binDir, { recursive: true });
|
|
11422
|
+
const linkBin = (0, import_path5.join)(binDir, "soffice");
|
|
11423
|
+
try {
|
|
11424
|
+
await (0, import_promises2.symlink)(actualBin, linkBin);
|
|
11425
|
+
} catch {
|
|
11426
|
+
}
|
|
11427
|
+
process.env.PATH = `${binDir}${import_path5.delimiter}${process.env.PATH}`;
|
|
11428
|
+
return linkBin;
|
|
11429
|
+
}
|
|
11430
|
+
async function installLibreOffice(onProgress) {
|
|
11431
|
+
const platform = process.platform;
|
|
11432
|
+
const pkg = PACKAGES[platform];
|
|
11433
|
+
if (!pkg) {
|
|
11434
|
+
throw new ConvertError(
|
|
11435
|
+
"UNSUPPORTED_PLATFORM",
|
|
11436
|
+
`${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
|
|
11437
|
+
);
|
|
11438
|
+
}
|
|
11439
|
+
return await installForPlatform(pkg, onProgress);
|
|
11440
|
+
}
|
|
11441
|
+
async function resolveSoffice(emitter, autoInstall = true) {
|
|
11442
|
+
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11443
|
+
const inPath = await findInPath();
|
|
11444
|
+
if (inPath) {
|
|
11445
|
+
emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
|
|
11446
|
+
return inPath;
|
|
11447
|
+
}
|
|
11448
|
+
const inCache = await findInCache();
|
|
11449
|
+
if (inCache) {
|
|
11450
|
+
emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
|
|
11451
|
+
return inCache;
|
|
11452
|
+
}
|
|
11453
|
+
if (!autoInstall) {
|
|
11454
|
+
emitter.error(
|
|
11455
|
+
"validate",
|
|
11456
|
+
"SOFFICE_NOT_FOUND",
|
|
11457
|
+
"LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
|
|
11458
|
+
"\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
|
|
11459
|
+
);
|
|
11460
|
+
throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
|
|
11461
|
+
}
|
|
11462
|
+
emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
|
|
11463
|
+
try {
|
|
11464
|
+
const installed = await installLibreOffice((downloaded, total) => {
|
|
11465
|
+
const percent = Math.round(downloaded / total * 100);
|
|
11466
|
+
emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
|
|
11467
|
+
percent,
|
|
11468
|
+
downloadedBytes: downloaded,
|
|
11469
|
+
totalBytes: total
|
|
11470
|
+
});
|
|
11471
|
+
});
|
|
11472
|
+
emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
|
|
11473
|
+
return installed;
|
|
11474
|
+
} catch (err) {
|
|
11475
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
11476
|
+
emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
|
|
11477
|
+
throw err;
|
|
11478
|
+
}
|
|
11479
|
+
}
|
|
11480
|
+
|
|
11481
|
+
// src/convert/libreoffice.ts
|
|
11482
|
+
var libreConvert = import_libreoffice_convert.default.convert;
|
|
11483
|
+
async function assertSofficeAvailable() {
|
|
11484
|
+
const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
|
|
11485
|
+
try {
|
|
11486
|
+
await runCommand2("soffice", ["--version"]);
|
|
11487
|
+
} catch {
|
|
11488
|
+
throw new ConvertError(
|
|
11489
|
+
"SOFFICE_NOT_FOUND",
|
|
11490
|
+
"soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694."
|
|
11491
|
+
);
|
|
11492
|
+
}
|
|
11493
|
+
}
|
|
11494
|
+
async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
11495
|
+
return new Promise((resolve4, reject) => {
|
|
11496
|
+
const timer = setTimeout(() => {
|
|
11497
|
+
reject(
|
|
11498
|
+
new ConvertError("TIMEOUT", `\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (${timeoutMs}ms \uCD08\uACFC)`)
|
|
11499
|
+
);
|
|
11500
|
+
}, timeoutMs);
|
|
11501
|
+
libreConvert(buffer, targetExt, void 0, (err, done) => {
|
|
11502
|
+
clearTimeout(timer);
|
|
11503
|
+
if (err || !done) {
|
|
11504
|
+
reject(
|
|
11505
|
+
new ConvertError(
|
|
11506
|
+
"CONVERT_FAILED",
|
|
11507
|
+
err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"
|
|
11508
|
+
)
|
|
11509
|
+
);
|
|
11510
|
+
return;
|
|
11511
|
+
}
|
|
11512
|
+
resolve4(done);
|
|
11513
|
+
});
|
|
11514
|
+
});
|
|
11515
|
+
}
|
|
11516
|
+
|
|
11517
|
+
// src/convert/events.ts
|
|
11518
|
+
var ConvertEventEmitter = class {
|
|
11519
|
+
listener = null;
|
|
11520
|
+
/** 이벤트 리스너 등록 */
|
|
11521
|
+
setListener(listener) {
|
|
11522
|
+
this.listener = listener;
|
|
11523
|
+
}
|
|
11524
|
+
/** 이벤트 발송 */
|
|
11525
|
+
emit(event) {
|
|
11526
|
+
try {
|
|
11527
|
+
this.listener?.(event);
|
|
11528
|
+
} catch {
|
|
11529
|
+
}
|
|
11530
|
+
}
|
|
11531
|
+
/** 타입 안전한 헬퍼: detect 이벤트 */
|
|
11532
|
+
detect(stage, message, meta) {
|
|
11533
|
+
this.emit({ type: "detect", stage, message, ...meta });
|
|
11534
|
+
}
|
|
11535
|
+
/** 타입 안전한 헬퍼: validate 이벤트 */
|
|
11536
|
+
validate(stage, message, meta) {
|
|
11537
|
+
this.emit({ type: "validate", stage, message, ...meta });
|
|
11538
|
+
}
|
|
11539
|
+
/** 타입 안전한 헬퍼: install 이벤트 */
|
|
11540
|
+
install(stage, message, meta) {
|
|
11541
|
+
this.emit({ type: "install", stage, message, ...meta });
|
|
11542
|
+
}
|
|
11543
|
+
/** 타입 안전한 헬퍼: convert 진행 이벤트 */
|
|
11544
|
+
progress(percent, message) {
|
|
11545
|
+
this.emit({ type: "convert", stage: "convert_progress", message, percent });
|
|
11546
|
+
}
|
|
11547
|
+
/** 타입 안전한 헬퍼: convert 시작 */
|
|
11548
|
+
convertStart(message) {
|
|
11549
|
+
this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
|
|
11550
|
+
}
|
|
11551
|
+
/** 타입 안전한 헬퍼: convert 완료 */
|
|
11552
|
+
convertDone(message) {
|
|
11553
|
+
this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
|
|
11554
|
+
}
|
|
11555
|
+
/** 타입 안전한 헬퍼: 완료 이벤트 */
|
|
11556
|
+
complete(result) {
|
|
11557
|
+
this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
|
|
11558
|
+
}
|
|
11559
|
+
/** 타입 안전한 헬퍼: 에러 이벤트 */
|
|
11560
|
+
error(stage, code, message, suggestion) {
|
|
11561
|
+
this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
|
|
11562
|
+
}
|
|
11563
|
+
};
|
|
11564
|
+
|
|
11565
|
+
// src/convert/index.ts
|
|
11566
|
+
var isConverting = false;
|
|
11567
|
+
var queue = [];
|
|
11568
|
+
async function acquireConvertLock() {
|
|
11569
|
+
if (!isConverting) {
|
|
11570
|
+
isConverting = true;
|
|
11571
|
+
return () => {
|
|
11572
|
+
isConverting = false;
|
|
11573
|
+
const next = queue.shift();
|
|
11574
|
+
next?.();
|
|
11575
|
+
};
|
|
11576
|
+
}
|
|
11577
|
+
return new Promise((resolve4) => {
|
|
11578
|
+
queue.push(() => {
|
|
11579
|
+
isConverting = true;
|
|
11580
|
+
resolve4(() => {
|
|
11581
|
+
isConverting = false;
|
|
11582
|
+
const next = queue.shift();
|
|
11583
|
+
next?.();
|
|
11584
|
+
});
|
|
11585
|
+
});
|
|
11586
|
+
});
|
|
11587
|
+
}
|
|
11588
|
+
async function convertToPdf(input, options) {
|
|
11589
|
+
const emitter = new ConvertEventEmitter();
|
|
11590
|
+
if (options?.onEvent) {
|
|
11591
|
+
emitter.setListener(options.onEvent);
|
|
11592
|
+
}
|
|
11593
|
+
if (options?.onProgress) {
|
|
11594
|
+
const legacyProgress = options.onProgress;
|
|
11595
|
+
emitter.setListener((event) => {
|
|
11596
|
+
if (event.type === "convert" && event.stage === "convert_progress") {
|
|
11597
|
+
legacyProgress(event.percent, event.message);
|
|
11598
|
+
}
|
|
11599
|
+
});
|
|
11600
|
+
}
|
|
11601
|
+
try {
|
|
11602
|
+
emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
|
|
11603
|
+
let buffer;
|
|
11604
|
+
try {
|
|
11605
|
+
if (typeof input === "string") {
|
|
11606
|
+
buffer = await (0, import_promises3.readFile)(input);
|
|
11607
|
+
} else if (Buffer.isBuffer(input)) {
|
|
11608
|
+
buffer = input;
|
|
11609
|
+
} else {
|
|
11610
|
+
buffer = Buffer.from(input);
|
|
11611
|
+
}
|
|
11612
|
+
} catch (err) {
|
|
11613
|
+
emitter.error(
|
|
11614
|
+
"detect",
|
|
11615
|
+
"PARSE_ERROR",
|
|
11616
|
+
`\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
|
|
11617
|
+
);
|
|
11618
|
+
return {
|
|
11619
|
+
success: false,
|
|
11620
|
+
code: "PARSE_ERROR",
|
|
11621
|
+
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
11622
|
+
stage: "detect"
|
|
11623
|
+
};
|
|
11624
|
+
}
|
|
11625
|
+
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
11626
|
+
if (buffer.length > MAX_FILE_SIZE) {
|
|
11627
|
+
emitter.error(
|
|
11628
|
+
"detect",
|
|
11629
|
+
"FILE_TOO_LARGE",
|
|
11630
|
+
`\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
|
|
11631
|
+
);
|
|
11632
|
+
return {
|
|
11633
|
+
success: false,
|
|
11634
|
+
code: "FILE_TOO_LARGE",
|
|
11635
|
+
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
11636
|
+
stage: "detect"
|
|
11637
|
+
};
|
|
11638
|
+
}
|
|
11639
|
+
const format = detectFormat(toArrayBuffer(buffer));
|
|
11640
|
+
emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
|
|
11641
|
+
if (format !== "hwp" && format !== "hwpx") {
|
|
11642
|
+
emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
|
|
11643
|
+
return {
|
|
11644
|
+
success: false,
|
|
11645
|
+
code: "UNSUPPORTED_FORMAT",
|
|
11646
|
+
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
11647
|
+
stage: "detect"
|
|
11648
|
+
};
|
|
11649
|
+
}
|
|
11650
|
+
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11651
|
+
let sofficePath;
|
|
11652
|
+
try {
|
|
11653
|
+
sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
|
|
11654
|
+
} catch (err) {
|
|
11655
|
+
if (err instanceof ConvertError) {
|
|
11656
|
+
return {
|
|
11657
|
+
success: false,
|
|
11658
|
+
code: err.code,
|
|
11659
|
+
error: err.message,
|
|
11660
|
+
stage: "validate"
|
|
11661
|
+
};
|
|
11662
|
+
}
|
|
11663
|
+
throw err;
|
|
11664
|
+
}
|
|
11665
|
+
const releaseLock = await acquireConvertLock();
|
|
11666
|
+
try {
|
|
11667
|
+
emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
|
|
11668
|
+
emitter.progress(10, "\uBCC0\uD658 \uC911...");
|
|
11669
|
+
const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
|
|
11670
|
+
emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
|
|
11671
|
+
emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
|
|
11672
|
+
const result = {
|
|
11673
|
+
success: true,
|
|
11674
|
+
pdf: new Uint8Array(pdf),
|
|
11675
|
+
sourceFormat: format
|
|
11676
|
+
};
|
|
11677
|
+
emitter.complete({
|
|
11678
|
+
sourceFormat: format,
|
|
11679
|
+
pdfSize: pdf.length
|
|
11680
|
+
});
|
|
11681
|
+
return result;
|
|
11682
|
+
} catch (err) {
|
|
11683
|
+
if (err instanceof ConvertError) {
|
|
11684
|
+
emitter.error("convert", err.code, err.message);
|
|
11685
|
+
return {
|
|
11686
|
+
success: false,
|
|
11687
|
+
code: err.code,
|
|
11688
|
+
error: err.message,
|
|
11689
|
+
stage: "convert"
|
|
11690
|
+
};
|
|
11691
|
+
}
|
|
11692
|
+
const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
|
|
11693
|
+
emitter.error("convert", classifyError(err), errorMsg);
|
|
11694
|
+
return {
|
|
11695
|
+
success: false,
|
|
11696
|
+
code: classifyError(err),
|
|
11697
|
+
error: errorMsg,
|
|
11698
|
+
stage: "convert"
|
|
11699
|
+
};
|
|
11700
|
+
} finally {
|
|
11701
|
+
releaseLock();
|
|
11702
|
+
}
|
|
11703
|
+
} catch (unexpectedErr) {
|
|
11704
|
+
const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
|
|
11705
|
+
emitter.error("convert", "PARSE_ERROR", errorMsg);
|
|
11706
|
+
return {
|
|
11707
|
+
success: false,
|
|
11708
|
+
code: "PARSE_ERROR",
|
|
11709
|
+
error: errorMsg,
|
|
11710
|
+
stage: "convert"
|
|
11711
|
+
};
|
|
11712
|
+
}
|
|
11713
|
+
}
|
|
11714
|
+
async function convertHwpToPdf(input, options) {
|
|
11715
|
+
const result = await convertToPdf(input, options);
|
|
11716
|
+
if (result.success && result.sourceFormat !== "hwp") {
|
|
11717
|
+
return {
|
|
11718
|
+
success: false,
|
|
11719
|
+
code: "UNSUPPORTED_FORMAT",
|
|
11720
|
+
error: `HWP 5.x \uD3EC\uB9F7\uC774 \uC544\uB2D9\uB2C8\uB2E4: ${result.sourceFormat}`,
|
|
11721
|
+
stage: "detect"
|
|
11722
|
+
};
|
|
11723
|
+
}
|
|
11724
|
+
return result;
|
|
11725
|
+
}
|
|
11726
|
+
async function convertHwpxToPdf(input, options) {
|
|
11727
|
+
const result = await convertToPdf(input, options);
|
|
11728
|
+
if (result.success && result.sourceFormat !== "hwpx") {
|
|
11729
|
+
return {
|
|
11730
|
+
success: false,
|
|
11731
|
+
code: "UNSUPPORTED_FORMAT",
|
|
11732
|
+
error: `HWPX \uD3EC\uB9F7\uC774 \uC544\uB2D9\uB2C8\uB2E4: ${result.sourceFormat}`,
|
|
11733
|
+
stage: "detect"
|
|
11734
|
+
};
|
|
11735
|
+
}
|
|
11736
|
+
return result;
|
|
11737
|
+
}
|
|
11738
|
+
|
|
11739
|
+
// src/index.ts
|
|
11740
|
+
init_utils();
|
|
11741
|
+
|
|
11233
11742
|
// src/ocr/api-key-rotation.ts
|
|
11234
11743
|
var AllKeysCoolingDownError = class extends Error {
|
|
11235
11744
|
waitMs;
|
|
@@ -11324,11 +11833,10 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11324
11833
|
};
|
|
11325
11834
|
|
|
11326
11835
|
// src/pipeline/unified-ocr.ts
|
|
11327
|
-
var
|
|
11328
|
-
var
|
|
11329
|
-
var
|
|
11836
|
+
var import_promises4 = require("fs/promises");
|
|
11837
|
+
var import_path6 = require("path");
|
|
11838
|
+
var import_child_process5 = require("child_process");
|
|
11330
11839
|
var import_node_perf_hooks = require("perf_hooks");
|
|
11331
|
-
var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
|
|
11332
11840
|
init_logger();
|
|
11333
11841
|
|
|
11334
11842
|
// src/pipeline/bounded-queue.ts
|
|
@@ -11390,7 +11898,6 @@ var BoundedQueue = class {
|
|
|
11390
11898
|
};
|
|
11391
11899
|
|
|
11392
11900
|
// src/pipeline/unified-ocr.ts
|
|
11393
|
-
var libreConvert = import_libreoffice_convert.default.convert;
|
|
11394
11901
|
var UnifiedOcrError = class extends Error {
|
|
11395
11902
|
code;
|
|
11396
11903
|
stage;
|
|
@@ -11462,15 +11969,15 @@ function elapsedMs(startAt) {
|
|
|
11462
11969
|
return Math.round(import_node_perf_hooks.performance.now() - startAt);
|
|
11463
11970
|
}
|
|
11464
11971
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11465
|
-
const absInput = (0,
|
|
11466
|
-
const stem = (0,
|
|
11467
|
-
const workspaceDir = (0,
|
|
11468
|
-
const imagesDir = (0,
|
|
11469
|
-
const rawDir = (0,
|
|
11470
|
-
const diffDir = (0,
|
|
11471
|
-
const outputPath = (0,
|
|
11472
|
-
const reportPath = (0,
|
|
11473
|
-
const modelCachePath = (0,
|
|
11972
|
+
const absInput = (0, import_path6.resolve)(inputPath);
|
|
11973
|
+
const stem = (0, import_path6.basename)(absInput, (0, import_path6.extname)(absInput));
|
|
11974
|
+
const workspaceDir = (0, import_path6.resolve)(options.workspaceDir ?? (0, import_path6.join)((0, import_path6.dirname)(absInput), `${stem}_ocr_workspace`));
|
|
11975
|
+
const imagesDir = (0, import_path6.join)(workspaceDir, "images");
|
|
11976
|
+
const rawDir = (0, import_path6.join)(workspaceDir, "ocr", "raw");
|
|
11977
|
+
const diffDir = (0, import_path6.join)(workspaceDir, "ocr", "diff");
|
|
11978
|
+
const outputPath = (0, import_path6.resolve)(options.outputPath ?? (0, import_path6.join)((0, import_path6.dirname)(absInput), `${stem}.md`));
|
|
11979
|
+
const reportPath = (0, import_path6.join)(workspaceDir, "run-report.json");
|
|
11980
|
+
const modelCachePath = (0, import_path6.join)((0, import_path6.dirname)(absInput), ".kordoc-model-cache.json");
|
|
11474
11981
|
const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
|
|
11475
11982
|
const timeoutMs = options.timeoutMs ?? 6e4;
|
|
11476
11983
|
const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
|
|
@@ -11484,9 +11991,9 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11484
11991
|
const keyPool = ApiKeyRotationPool.fromEnv();
|
|
11485
11992
|
const runId = options.runId ?? generateRunId("ocr");
|
|
11486
11993
|
const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
|
|
11487
|
-
await (0,
|
|
11488
|
-
await (0,
|
|
11489
|
-
await (0,
|
|
11994
|
+
await (0, import_promises4.mkdir)(imagesDir, { recursive: true });
|
|
11995
|
+
await (0, import_promises4.mkdir)(rawDir, { recursive: true });
|
|
11996
|
+
await (0, import_promises4.mkdir)(diffDir, { recursive: true });
|
|
11490
11997
|
const timingsMs = {};
|
|
11491
11998
|
const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
|
|
11492
11999
|
const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
|
|
@@ -11502,12 +12009,12 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11502
12009
|
currentStage = "convert";
|
|
11503
12010
|
markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
|
|
11504
12011
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
11505
|
-
if ((0,
|
|
12012
|
+
if ((0, import_path6.extname)(absInput).toLowerCase() !== ".pdf") {
|
|
11506
12013
|
await assertSofficeAvailable();
|
|
11507
|
-
workingPdfPath = (0,
|
|
11508
|
-
const inputBuffer = await (0,
|
|
11509
|
-
const out = await
|
|
11510
|
-
await (0,
|
|
12014
|
+
workingPdfPath = (0, import_path6.join)(workspaceDir, `${stem}.pdf`);
|
|
12015
|
+
const inputBuffer = await (0, import_promises4.readFile)(absInput);
|
|
12016
|
+
const out = await convertBuffer(inputBuffer, ".pdf");
|
|
12017
|
+
await (0, import_promises4.writeFile)(workingPdfPath, out);
|
|
11511
12018
|
}
|
|
11512
12019
|
timingsMs.convert = elapsedMs(convertStart);
|
|
11513
12020
|
markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
|
|
@@ -11518,10 +12025,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11518
12025
|
if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11519
12026
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
11520
12027
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
|
|
11521
|
-
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, (0,
|
|
11522
|
-
const firstFiles = (await (0,
|
|
12028
|
+
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, (0, import_path6.join)(imagesDir, "page")]);
|
|
12029
|
+
const firstFiles = (await (0, import_promises4.readdir)(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
11523
12030
|
if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
|
|
11524
|
-
const probeImage = (0,
|
|
12031
|
+
const probeImage = (0, import_path6.join)(imagesDir, firstFiles[0]);
|
|
11525
12032
|
markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
|
|
11526
12033
|
const probeStart = import_node_perf_hooks.performance.now();
|
|
11527
12034
|
currentStage = "probe";
|
|
@@ -11557,7 +12064,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11557
12064
|
const keyCount = keyPool.snapshot().length;
|
|
11558
12065
|
const workerCount = Math.max(1, keyCount * concurrencyPerKey);
|
|
11559
12066
|
const queueCapacity = workerCount * 2;
|
|
11560
|
-
const
|
|
12067
|
+
const queue2 = new BoundedQueue(queueCapacity);
|
|
11561
12068
|
const ocrStart = import_node_perf_hooks.performance.now();
|
|
11562
12069
|
currentStage = "ocr";
|
|
11563
12070
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (\uC6CC\uCEE4 ${workerCount}\uAC1C)`);
|
|
@@ -11565,17 +12072,17 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11565
12072
|
let renderDone = 1;
|
|
11566
12073
|
const renderProducer = (async () => {
|
|
11567
12074
|
try {
|
|
11568
|
-
await
|
|
12075
|
+
await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
|
|
11569
12076
|
if (totalPages > 1) {
|
|
11570
|
-
for await (const item of renderPdfToPngStream(workingPdfPath, (0,
|
|
11571
|
-
await
|
|
12077
|
+
for await (const item of renderPdfToPngStream(workingPdfPath, (0, import_path6.join)(imagesDir, "page"), dpi, totalPages, 2)) {
|
|
12078
|
+
await queue2.enqueue(item);
|
|
11572
12079
|
renderDone++;
|
|
11573
12080
|
markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
|
|
11574
12081
|
logStage("debug", "render", "progress", "\uD398\uC774\uC9C0 \uB80C\uB354 \uC644\uB8CC", { page: item.pageNumber });
|
|
11575
12082
|
}
|
|
11576
12083
|
}
|
|
11577
12084
|
} finally {
|
|
11578
|
-
|
|
12085
|
+
queue2.close();
|
|
11579
12086
|
timingsMs.render = elapsedMs(renderStart);
|
|
11580
12087
|
markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
|
|
11581
12088
|
logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: renderDone, elapsedMs: timingsMs.render });
|
|
@@ -11584,7 +12091,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11584
12091
|
const [, pageResultsMap] = await Promise.all([
|
|
11585
12092
|
renderProducer,
|
|
11586
12093
|
ocrWorkerPool({
|
|
11587
|
-
queue,
|
|
12094
|
+
queue: queue2,
|
|
11588
12095
|
workerCount,
|
|
11589
12096
|
totalPages,
|
|
11590
12097
|
ocrInput: {
|
|
@@ -11617,8 +12124,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11617
12124
|
const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
|
|
11618
12125
|
const rawPagePaths = [];
|
|
11619
12126
|
for (const [pageNum, markdown] of sortedEntries) {
|
|
11620
|
-
const pagePath = (0,
|
|
11621
|
-
await (0,
|
|
12127
|
+
const pagePath = (0, import_path6.join)(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
|
|
12128
|
+
await (0, import_promises4.writeFile)(pagePath, markdown, "utf-8");
|
|
11622
12129
|
rawPagePaths.push(pagePath);
|
|
11623
12130
|
}
|
|
11624
12131
|
const mergeStart = import_node_perf_hooks.performance.now();
|
|
@@ -11626,7 +12133,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11626
12133
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11627
12134
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
11628
12135
|
const merged = await mergeMarkdownPages(rawPagePaths);
|
|
11629
|
-
await (0,
|
|
12136
|
+
await (0, import_promises4.writeFile)(outputPath, merged, "utf-8");
|
|
11630
12137
|
timingsMs.merge = elapsedMs(mergeStart);
|
|
11631
12138
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
11632
12139
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
@@ -11642,7 +12149,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11642
12149
|
timingsMs,
|
|
11643
12150
|
modelCachePath
|
|
11644
12151
|
};
|
|
11645
|
-
await (0,
|
|
12152
|
+
await (0, import_promises4.writeFile)(reportPath, JSON.stringify(report, null, 2), "utf-8");
|
|
11646
12153
|
logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
|
|
11647
12154
|
return { outputPath, reportPath, selectedModel };
|
|
11648
12155
|
} catch (err) {
|
|
@@ -11704,17 +12211,6 @@ function emitProgress(cb, stage, stagePercent, weights, extra) {
|
|
|
11704
12211
|
model: extra.model
|
|
11705
12212
|
});
|
|
11706
12213
|
}
|
|
11707
|
-
async function convertWithLibreOffice(buffer, ext) {
|
|
11708
|
-
return await new Promise((resolvePromise, reject) => {
|
|
11709
|
-
libreConvert(buffer, ext, void 0, (err, done) => {
|
|
11710
|
-
if (err || !done) {
|
|
11711
|
-
reject(new UnifiedOcrError("CONVERT_FAILED", "convert", err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"));
|
|
11712
|
-
return;
|
|
11713
|
-
}
|
|
11714
|
-
resolvePromise(done);
|
|
11715
|
-
});
|
|
11716
|
-
});
|
|
11717
|
-
}
|
|
11718
12214
|
async function getPdfPageCount(pdfPath) {
|
|
11719
12215
|
const stdout = await runCommandWithStdout("pdfinfo", [pdfPath]);
|
|
11720
12216
|
const m = stdout.match(/^\s*Pages:\s*(\d+)\s*$/mi);
|
|
@@ -11728,7 +12224,7 @@ async function getPdfPageCount(pdfPath) {
|
|
|
11728
12224
|
return n;
|
|
11729
12225
|
}
|
|
11730
12226
|
async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, startPage = 1) {
|
|
11731
|
-
const imagesDir = (0,
|
|
12227
|
+
const imagesDir = (0, import_path6.dirname)(prefixPath);
|
|
11732
12228
|
for (let page = startPage; page <= totalPages; page++) {
|
|
11733
12229
|
try {
|
|
11734
12230
|
await runCommand("pdftoppm", [
|
|
@@ -11742,9 +12238,9 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
11742
12238
|
pdfPath,
|
|
11743
12239
|
prefixPath
|
|
11744
12240
|
]);
|
|
11745
|
-
const files = await (0,
|
|
12241
|
+
const files = await (0, import_promises4.readdir)(imagesDir);
|
|
11746
12242
|
const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
11747
|
-
const imagePath = (0,
|
|
12243
|
+
const imagePath = (0, import_path6.join)(imagesDir, pageFiles[pageFiles.length - 1]);
|
|
11748
12244
|
yield { pageNumber: page, imagePath };
|
|
11749
12245
|
} catch (err) {
|
|
11750
12246
|
yield {
|
|
@@ -11757,7 +12253,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
11757
12253
|
}
|
|
11758
12254
|
async function runCommand(cmd, args) {
|
|
11759
12255
|
await new Promise((resolvePromise, reject) => {
|
|
11760
|
-
const child = (0,
|
|
12256
|
+
const child = (0, import_child_process5.spawn)(cmd, args, { stdio: "pipe" });
|
|
11761
12257
|
let stderr = "";
|
|
11762
12258
|
child.stderr.on("data", (d) => {
|
|
11763
12259
|
stderr += String(d);
|
|
@@ -11771,7 +12267,7 @@ async function runCommand(cmd, args) {
|
|
|
11771
12267
|
}
|
|
11772
12268
|
async function runCommandWithStdout(cmd, args) {
|
|
11773
12269
|
return await new Promise((resolvePromise, reject) => {
|
|
11774
|
-
const child = (0,
|
|
12270
|
+
const child = (0, import_child_process5.spawn)(cmd, args, { stdio: "pipe" });
|
|
11775
12271
|
let stdout = "";
|
|
11776
12272
|
let stderr = "";
|
|
11777
12273
|
child.stdout.on("data", (d) => {
|
|
@@ -11787,13 +12283,6 @@ async function runCommandWithStdout(cmd, args) {
|
|
|
11787
12283
|
});
|
|
11788
12284
|
});
|
|
11789
12285
|
}
|
|
11790
|
-
async function assertSofficeAvailable() {
|
|
11791
|
-
try {
|
|
11792
|
-
await runCommand("soffice", ["--version"]);
|
|
11793
|
-
} catch {
|
|
11794
|
-
throw new UnifiedOcrError("SOFFICE_NOT_FOUND", "convert", "soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.");
|
|
11795
|
-
}
|
|
11796
|
-
}
|
|
11797
12286
|
function naturalPageSort(a, b) {
|
|
11798
12287
|
const na = Number((a.match(/\d+/g) || ["0"]).at(-1) || 0);
|
|
11799
12288
|
const nb = Number((b.match(/\d+/g) || ["0"]).at(-1) || 0);
|
|
@@ -11867,7 +12356,7 @@ function startParallelProbeRuns(input) {
|
|
|
11867
12356
|
}
|
|
11868
12357
|
async function loadModelCache(path) {
|
|
11869
12358
|
try {
|
|
11870
|
-
const raw = await (0,
|
|
12359
|
+
const raw = await (0, import_promises4.readFile)(path, "utf-8");
|
|
11871
12360
|
return JSON.parse(raw);
|
|
11872
12361
|
} catch {
|
|
11873
12362
|
return null;
|
|
@@ -11898,15 +12387,15 @@ async function updateModelCache(path, probes) {
|
|
|
11898
12387
|
}
|
|
11899
12388
|
}
|
|
11900
12389
|
current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
11901
|
-
await (0,
|
|
12390
|
+
await (0, import_promises4.writeFile)(path, JSON.stringify(current, null, 2), "utf-8");
|
|
11902
12391
|
}
|
|
11903
12392
|
async function ocrWorkerPool(input) {
|
|
11904
|
-
const { queue, workerCount, ocrInput, onPageDone } = input;
|
|
12393
|
+
const { queue: queue2, workerCount, ocrInput, onPageDone } = input;
|
|
11905
12394
|
const results = /* @__PURE__ */ new Map();
|
|
11906
12395
|
let completedCount = 0;
|
|
11907
12396
|
async function worker() {
|
|
11908
12397
|
while (true) {
|
|
11909
|
-
const item = await
|
|
12398
|
+
const item = await queue2.dequeue();
|
|
11910
12399
|
if (item === QUEUE_DONE) break;
|
|
11911
12400
|
const { pageNumber, imagePath, error } = item;
|
|
11912
12401
|
if (imagePath === null) {
|
|
@@ -11958,7 +12447,7 @@ async function ocrImageWithFallback(input) {
|
|
|
11958
12447
|
async function mergeMarkdownPages(paths) {
|
|
11959
12448
|
const out = [];
|
|
11960
12449
|
for (let i = 0; i < paths.length; i++) {
|
|
11961
|
-
const txt = (await (0,
|
|
12450
|
+
const txt = (await (0, import_promises4.readFile)(paths[i], "utf-8")).trim();
|
|
11962
12451
|
if (!txt) continue;
|
|
11963
12452
|
out.push(txt);
|
|
11964
12453
|
}
|
|
@@ -12074,7 +12563,7 @@ async function ocrImageViaNim(input) {
|
|
|
12074
12563
|
throw new UnifiedOcrError("OCR_FAILED", "ocr", `OCR \uC7AC\uC2DC\uB3C4 \uCD08\uACFC: ${lastErr}`);
|
|
12075
12564
|
}
|
|
12076
12565
|
async function encodeBase64(path) {
|
|
12077
|
-
const b = await (0,
|
|
12566
|
+
const b = await (0, import_promises4.readFile)(path);
|
|
12078
12567
|
return b.toString("base64");
|
|
12079
12568
|
}
|
|
12080
12569
|
function stripCodeFence3(text) {
|
|
@@ -12086,7 +12575,7 @@ async function delay(ms) {
|
|
|
12086
12575
|
await new Promise((resolvePromise) => setTimeout(resolvePromise, ms));
|
|
12087
12576
|
}
|
|
12088
12577
|
function ensureSupportedInput(path) {
|
|
12089
|
-
const ext = (0,
|
|
12578
|
+
const ext = (0, import_path6.extname)(path).toLowerCase();
|
|
12090
12579
|
const allowed = /* @__PURE__ */ new Set([".pdf", ".hwp", ".hwpx", ".docx", ".xlsx"]);
|
|
12091
12580
|
if (!allowed.has(ext)) {
|
|
12092
12581
|
throw new UnifiedOcrError("UNSUPPORTED_INPUT", "convert", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uC785\uB825 \uD3EC\uB9F7: ${ext}`);
|
|
@@ -12113,7 +12602,7 @@ async function parse2(input, options) {
|
|
|
12113
12602
|
let buffer;
|
|
12114
12603
|
if (typeof input === "string") {
|
|
12115
12604
|
try {
|
|
12116
|
-
const buf = await (0,
|
|
12605
|
+
const buf = await (0, import_promises5.readFile)(input);
|
|
12117
12606
|
buffer = toArrayBuffer(buf);
|
|
12118
12607
|
} catch (err) {
|
|
12119
12608
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
@@ -12273,6 +12762,9 @@ async function parseDocx(buffer, options, zip) {
|
|
|
12273
12762
|
VERSION,
|
|
12274
12763
|
blocksToMarkdown,
|
|
12275
12764
|
compare,
|
|
12765
|
+
convertHwpToPdf,
|
|
12766
|
+
convertHwpxToPdf,
|
|
12767
|
+
convertToPdf,
|
|
12276
12768
|
detectFormat,
|
|
12277
12769
|
detectZipFormat,
|
|
12278
12770
|
diffBlocks,
|