@clazic/kordoc 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TS3F57LY.js → chunk-3FTA6V7S.js} +394 -67
- package/dist/chunk-3FTA6V7S.js.map +1 -0
- package/dist/{chunk-TND4YFBV.js → chunk-USE7IDLV.js} +2 -2
- package/dist/cli.js +46 -11
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +551 -231
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +71 -9
- package/dist/index.d.ts +71 -9
- package/dist/index.js +537 -217
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-F66K7PXH.js → utils-XYBJBWM2.js} +2 -2
- package/dist/{watch-2S5ULHAM.js → watch-CJRS6OYE.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-TS3F57LY.js.map +0 -1
- /package/dist/{chunk-TND4YFBV.js.map → chunk-USE7IDLV.js.map} +0 -0
- /package/dist/{utils-F66K7PXH.js.map → utils-XYBJBWM2.js.map} +0 -0
- /package/dist/{watch-2S5ULHAM.js.map → watch-CJRS6OYE.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -37,118 +37,6 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
37
37
|
mod
|
|
38
38
|
));
|
|
39
39
|
|
|
40
|
-
// src/utils.ts
|
|
41
|
-
var utils_exports = {};
|
|
42
|
-
__export(utils_exports, {
|
|
43
|
-
KordocError: () => KordocError,
|
|
44
|
-
VERSION: () => VERSION,
|
|
45
|
-
classifyError: () => classifyError,
|
|
46
|
-
isPathTraversal: () => isPathTraversal,
|
|
47
|
-
normalizeKordocError: () => normalizeKordocError,
|
|
48
|
-
precheckZipSize: () => precheckZipSize,
|
|
49
|
-
sanitizeError: () => sanitizeError,
|
|
50
|
-
sanitizeHref: () => sanitizeHref,
|
|
51
|
-
toArrayBuffer: () => toArrayBuffer
|
|
52
|
-
});
|
|
53
|
-
function toArrayBuffer(buf) {
|
|
54
|
-
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
55
|
-
return buf.buffer;
|
|
56
|
-
}
|
|
57
|
-
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
58
|
-
}
|
|
59
|
-
function sanitizeError(err) {
|
|
60
|
-
if (err instanceof KordocError) return err.message;
|
|
61
|
-
return "\uBB38\uC11C \uCC98\uB9AC \uC911 \uC624\uB958\uAC00 \uBC1C\uC0DD\uD588\uC2B5\uB2C8\uB2E4";
|
|
62
|
-
}
|
|
63
|
-
function isPathTraversal(name) {
|
|
64
|
-
if (name.includes("\0")) return true;
|
|
65
|
-
const normalized = name.replace(/\\/g, "/");
|
|
66
|
-
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
67
|
-
}
|
|
68
|
-
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
69
|
-
try {
|
|
70
|
-
const data = new DataView(buffer);
|
|
71
|
-
const len = buffer.byteLength;
|
|
72
|
-
let eocdOffset = -1;
|
|
73
|
-
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
74
|
-
if (data.getUint32(i, true) === 101010256) {
|
|
75
|
-
eocdOffset = i;
|
|
76
|
-
break;
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
80
|
-
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
81
|
-
if (entryCount > maxEntries) {
|
|
82
|
-
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
83
|
-
}
|
|
84
|
-
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
85
|
-
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
86
|
-
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
87
|
-
let totalUncompressed = 0;
|
|
88
|
-
let pos = cdOffset;
|
|
89
|
-
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
90
|
-
if (data.getUint32(pos, true) !== 33639248) break;
|
|
91
|
-
totalUncompressed += data.getUint32(pos + 24, true);
|
|
92
|
-
const nameLen = data.getUint16(pos + 28, true);
|
|
93
|
-
const extraLen = data.getUint16(pos + 30, true);
|
|
94
|
-
const commentLen = data.getUint16(pos + 32, true);
|
|
95
|
-
pos += 46 + nameLen + extraLen + commentLen;
|
|
96
|
-
}
|
|
97
|
-
if (totalUncompressed > maxUncompressedSize) {
|
|
98
|
-
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
99
|
-
}
|
|
100
|
-
return { totalUncompressed, entryCount };
|
|
101
|
-
} catch (err) {
|
|
102
|
-
if (err instanceof KordocError) throw err;
|
|
103
|
-
return { totalUncompressed: 0, entryCount: 0 };
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
function sanitizeHref(href) {
|
|
107
|
-
const trimmed = href.trim();
|
|
108
|
-
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
109
|
-
return trimmed;
|
|
110
|
-
}
|
|
111
|
-
function classifyError(err) {
|
|
112
|
-
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
113
|
-
const msg = err.message;
|
|
114
|
-
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
115
|
-
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
116
|
-
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
117
|
-
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
118
|
-
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
119
|
-
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
120
|
-
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
121
|
-
return "PARSE_ERROR";
|
|
122
|
-
}
|
|
123
|
-
function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
|
|
124
|
-
if (err instanceof KordocError) {
|
|
125
|
-
if (!err.stage) err.stage = stage;
|
|
126
|
-
if (!err.code) err.code = fallbackCode;
|
|
127
|
-
return err;
|
|
128
|
-
}
|
|
129
|
-
const message = err instanceof Error ? err.message : fallbackMessage;
|
|
130
|
-
const code = err instanceof Error ? classifyError(err) : fallbackCode;
|
|
131
|
-
return new KordocError(message || fallbackMessage, { code, stage });
|
|
132
|
-
}
|
|
133
|
-
var VERSION, KordocError, SAFE_HREF_RE;
|
|
134
|
-
var init_utils = __esm({
|
|
135
|
-
"src/utils.ts"() {
|
|
136
|
-
"use strict";
|
|
137
|
-
VERSION = true ? "2.5.2" : "0.0.0-dev";
|
|
138
|
-
KordocError = class extends Error {
|
|
139
|
-
code;
|
|
140
|
-
stage;
|
|
141
|
-
constructor(message, opts = {}) {
|
|
142
|
-
super(message);
|
|
143
|
-
this.name = "KordocError";
|
|
144
|
-
this.code = opts.code;
|
|
145
|
-
this.stage = opts.stage;
|
|
146
|
-
}
|
|
147
|
-
};
|
|
148
|
-
SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
149
|
-
}
|
|
150
|
-
});
|
|
151
|
-
|
|
152
40
|
// src/page-range.ts
|
|
153
41
|
var page_range_exports = {};
|
|
154
42
|
__export(page_range_exports, {
|
|
@@ -3223,8 +3111,97 @@ async function detectZipFormat(buffer) {
|
|
|
3223
3111
|
import JSZip2 from "jszip";
|
|
3224
3112
|
import { DOMParser } from "@xmldom/xmldom";
|
|
3225
3113
|
|
|
3114
|
+
// src/utils.ts
|
|
3115
|
+
var VERSION = true ? "2.6.1" : "0.0.0-dev";
|
|
3116
|
+
function toArrayBuffer(buf) {
|
|
3117
|
+
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3118
|
+
return buf.buffer;
|
|
3119
|
+
}
|
|
3120
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
3121
|
+
}
|
|
3122
|
+
var KordocError = class extends Error {
|
|
3123
|
+
code;
|
|
3124
|
+
stage;
|
|
3125
|
+
constructor(message, opts = {}) {
|
|
3126
|
+
super(message);
|
|
3127
|
+
this.name = "KordocError";
|
|
3128
|
+
this.code = opts.code;
|
|
3129
|
+
this.stage = opts.stage;
|
|
3130
|
+
}
|
|
3131
|
+
};
|
|
3132
|
+
function isPathTraversal(name) {
|
|
3133
|
+
if (name.includes("\0")) return true;
|
|
3134
|
+
const normalized = name.replace(/\\/g, "/");
|
|
3135
|
+
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
3136
|
+
}
|
|
3137
|
+
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
3138
|
+
try {
|
|
3139
|
+
const data = new DataView(buffer);
|
|
3140
|
+
const len = buffer.byteLength;
|
|
3141
|
+
let eocdOffset = -1;
|
|
3142
|
+
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
3143
|
+
if (data.getUint32(i, true) === 101010256) {
|
|
3144
|
+
eocdOffset = i;
|
|
3145
|
+
break;
|
|
3146
|
+
}
|
|
3147
|
+
}
|
|
3148
|
+
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
3149
|
+
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
3150
|
+
if (entryCount > maxEntries) {
|
|
3151
|
+
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
3152
|
+
}
|
|
3153
|
+
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
3154
|
+
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
3155
|
+
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
3156
|
+
let totalUncompressed = 0;
|
|
3157
|
+
let pos = cdOffset;
|
|
3158
|
+
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
3159
|
+
if (data.getUint32(pos, true) !== 33639248) break;
|
|
3160
|
+
totalUncompressed += data.getUint32(pos + 24, true);
|
|
3161
|
+
const nameLen = data.getUint16(pos + 28, true);
|
|
3162
|
+
const extraLen = data.getUint16(pos + 30, true);
|
|
3163
|
+
const commentLen = data.getUint16(pos + 32, true);
|
|
3164
|
+
pos += 46 + nameLen + extraLen + commentLen;
|
|
3165
|
+
}
|
|
3166
|
+
if (totalUncompressed > maxUncompressedSize) {
|
|
3167
|
+
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
3168
|
+
}
|
|
3169
|
+
return { totalUncompressed, entryCount };
|
|
3170
|
+
} catch (err) {
|
|
3171
|
+
if (err instanceof KordocError) throw err;
|
|
3172
|
+
return { totalUncompressed: 0, entryCount: 0 };
|
|
3173
|
+
}
|
|
3174
|
+
}
|
|
3175
|
+
var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
3176
|
+
function sanitizeHref(href) {
|
|
3177
|
+
const trimmed = href.trim();
|
|
3178
|
+
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
3179
|
+
return trimmed;
|
|
3180
|
+
}
|
|
3181
|
+
function classifyError(err) {
|
|
3182
|
+
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
3183
|
+
const msg = err.message;
|
|
3184
|
+
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
3185
|
+
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
3186
|
+
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
3187
|
+
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
3188
|
+
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
3189
|
+
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
3190
|
+
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
3191
|
+
return "PARSE_ERROR";
|
|
3192
|
+
}
|
|
3193
|
+
function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
|
|
3194
|
+
if (err instanceof KordocError) {
|
|
3195
|
+
if (!err.stage) err.stage = stage;
|
|
3196
|
+
if (!err.code) err.code = fallbackCode;
|
|
3197
|
+
return err;
|
|
3198
|
+
}
|
|
3199
|
+
const message = err instanceof Error ? err.message : fallbackMessage;
|
|
3200
|
+
const code = err instanceof Error ? classifyError(err) : fallbackCode;
|
|
3201
|
+
return new KordocError(message || fallbackMessage, { code, stage });
|
|
3202
|
+
}
|
|
3203
|
+
|
|
3226
3204
|
// src/table/builder.ts
|
|
3227
|
-
init_utils();
|
|
3228
3205
|
var MAX_COLS = 200;
|
|
3229
3206
|
var MAX_ROWS = 1e4;
|
|
3230
3207
|
function buildTable(rows) {
|
|
@@ -3484,8 +3461,6 @@ var HEADING_RATIO_H2 = 1.3;
|
|
|
3484
3461
|
var HEADING_RATIO_H3 = 1.15;
|
|
3485
3462
|
|
|
3486
3463
|
// src/hwpx/parser.ts
|
|
3487
|
-
init_utils();
|
|
3488
|
-
init_utils();
|
|
3489
3464
|
init_page_range();
|
|
3490
3465
|
init_logger();
|
|
3491
3466
|
var MAX_DECOMPRESS_SIZE = 500 * 1024 * 1024;
|
|
@@ -4327,7 +4302,6 @@ function extractTextFromNode(node) {
|
|
|
4327
4302
|
}
|
|
4328
4303
|
|
|
4329
4304
|
// src/hwp5/record.ts
|
|
4330
|
-
init_utils();
|
|
4331
4305
|
import { inflateRawSync, inflateSync } from "zlib";
|
|
4332
4306
|
var TAG_PARA_HEADER = 66;
|
|
4333
4307
|
var TAG_PARA_TEXT = 67;
|
|
@@ -5378,7 +5352,6 @@ function parseLenientCfb(data) {
|
|
|
5378
5352
|
}
|
|
5379
5353
|
|
|
5380
5354
|
// src/hwp5/parser.ts
|
|
5381
|
-
init_utils();
|
|
5382
5355
|
init_page_range();
|
|
5383
5356
|
init_logger();
|
|
5384
5357
|
var CFB = __toESM(require_cfb(), 1);
|
|
@@ -6034,7 +6007,6 @@ function arrangeCells(rows, cols, cells) {
|
|
|
6034
6007
|
}
|
|
6035
6008
|
|
|
6036
6009
|
// src/pdf/parser.ts
|
|
6037
|
-
init_utils();
|
|
6038
6010
|
init_page_range();
|
|
6039
6011
|
import { createRequire } from "module";
|
|
6040
6012
|
import { dirname as dirname2, join as join3, resolve as resolve2 } from "path";
|
|
@@ -7926,7 +7898,6 @@ function mergeKoreanLines(text) {
|
|
|
7926
7898
|
}
|
|
7927
7899
|
|
|
7928
7900
|
// src/xlsx/parser.ts
|
|
7929
|
-
init_utils();
|
|
7930
7901
|
import JSZip3 from "jszip";
|
|
7931
7902
|
import { DOMParser as DOMParser2 } from "@xmldom/xmldom";
|
|
7932
7903
|
init_logger();
|
|
@@ -8255,7 +8226,6 @@ async function parseXlsxDocument(buffer, options, existingZip) {
|
|
|
8255
8226
|
}
|
|
8256
8227
|
|
|
8257
8228
|
// src/docx/parser.ts
|
|
8258
|
-
init_utils();
|
|
8259
8229
|
import JSZip4 from "jszip";
|
|
8260
8230
|
import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
|
|
8261
8231
|
init_logger();
|
|
@@ -8737,7 +8707,6 @@ async function parseDocxDocument(buffer, options, existingZip) {
|
|
|
8737
8707
|
}
|
|
8738
8708
|
|
|
8739
8709
|
// src/index.ts
|
|
8740
|
-
init_utils();
|
|
8741
8710
|
init_cli_provider();
|
|
8742
8711
|
init_markdown_to_blocks();
|
|
8743
8712
|
init_logger();
|
|
@@ -11241,7 +11210,6 @@ async function markdownToXlsx(markdown, options) {
|
|
|
11241
11210
|
|
|
11242
11211
|
// src/convert/index.ts
|
|
11243
11212
|
import { readFile } from "fs/promises";
|
|
11244
|
-
init_utils();
|
|
11245
11213
|
|
|
11246
11214
|
// src/convert/libreoffice.ts
|
|
11247
11215
|
import libre from "libreoffice-convert";
|
|
@@ -11255,19 +11223,250 @@ var ConvertError = class extends Error {
|
|
|
11255
11223
|
}
|
|
11256
11224
|
};
|
|
11257
11225
|
|
|
11258
|
-
// src/convert/
|
|
11259
|
-
|
|
11260
|
-
|
|
11261
|
-
|
|
11226
|
+
// src/convert/installer.ts
|
|
11227
|
+
import { homedir } from "os";
|
|
11228
|
+
import { join as join4, delimiter } from "path";
|
|
11229
|
+
import { mkdir, access, symlink, rm } from "fs/promises";
|
|
11230
|
+
import { createWriteStream } from "fs";
|
|
11231
|
+
import { spawn as spawn2 } from "child_process";
|
|
11232
|
+
var installInFlight = null;
|
|
11233
|
+
var CACHE_DIR = join4(homedir(), ".cache", "kordoc", "libreoffice");
|
|
11234
|
+
var VERSION_FILE = join4(CACHE_DIR, "version");
|
|
11235
|
+
var PACKAGES = {
|
|
11236
|
+
darwin: {
|
|
11237
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/mac/x86_64/LibreOffice_24.8.4_MacOS_x86-64.dmg",
|
|
11238
|
+
binPath: "LibreOffice.app/Contents/MacOS/soffice",
|
|
11239
|
+
sizeMb: 300
|
|
11240
|
+
},
|
|
11241
|
+
linux: {
|
|
11242
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/deb/x86_64/LibreOffice_24.8.4_Linux_x86-64_deb.tar.gz",
|
|
11243
|
+
binPath: "opt/libreoffice24.8/program/soffice",
|
|
11244
|
+
sizeMb: 200
|
|
11245
|
+
},
|
|
11246
|
+
win32: {
|
|
11247
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
|
|
11248
|
+
binPath: "LibreOffice/program/soffice.exe",
|
|
11249
|
+
sizeMb: 350
|
|
11250
|
+
}
|
|
11251
|
+
};
|
|
11252
|
+
async function findInPath() {
|
|
11253
|
+
return new Promise((resolve4) => {
|
|
11254
|
+
const child = spawn2("soffice", ["--version"], { stdio: "ignore" });
|
|
11255
|
+
child.on("close", (code) => resolve4(code === 0 ? "soffice" : null));
|
|
11256
|
+
child.on("error", () => resolve4(null));
|
|
11257
|
+
});
|
|
11258
|
+
}
|
|
11259
|
+
async function findInCache() {
|
|
11260
|
+
const cachedBin = join4(CACHE_DIR, "bin", "soffice");
|
|
11261
|
+
try {
|
|
11262
|
+
await access(cachedBin);
|
|
11263
|
+
return cachedBin;
|
|
11264
|
+
} catch {
|
|
11265
|
+
return null;
|
|
11266
|
+
}
|
|
11267
|
+
}
|
|
11268
|
+
async function findInDefaultPaths() {
|
|
11269
|
+
const platform = process.platform;
|
|
11270
|
+
const paths = [];
|
|
11271
|
+
if (platform === "darwin") {
|
|
11272
|
+
paths.push(
|
|
11273
|
+
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
|
|
11274
|
+
"/opt/homebrew/bin/soffice",
|
|
11275
|
+
"/usr/local/bin/soffice"
|
|
11276
|
+
);
|
|
11277
|
+
} else if (platform === "linux") {
|
|
11278
|
+
paths.push(
|
|
11279
|
+
"/usr/bin/soffice",
|
|
11280
|
+
"/usr/lib/libreoffice/program/soffice"
|
|
11281
|
+
);
|
|
11282
|
+
} else if (platform === "win32") {
|
|
11283
|
+
const pf = process.env["ProgramFiles"] ?? "C:\\Program Files";
|
|
11284
|
+
const pf86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
|
|
11285
|
+
paths.push(
|
|
11286
|
+
join4(pf, "LibreOffice", "program", "soffice.exe"),
|
|
11287
|
+
join4(pf86, "LibreOffice", "program", "soffice.exe")
|
|
11288
|
+
);
|
|
11289
|
+
}
|
|
11290
|
+
for (const p of paths) {
|
|
11291
|
+
try {
|
|
11292
|
+
await access(p);
|
|
11293
|
+
return p;
|
|
11294
|
+
} catch {
|
|
11295
|
+
continue;
|
|
11296
|
+
}
|
|
11297
|
+
}
|
|
11298
|
+
return null;
|
|
11299
|
+
}
|
|
11300
|
+
async function downloadWithProgress(url, dest, totalBytes, onProgress) {
|
|
11301
|
+
const response = await fetch(url);
|
|
11302
|
+
if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
|
|
11303
|
+
const file = createWriteStream(dest);
|
|
11304
|
+
const reader = response.body.getReader();
|
|
11305
|
+
let downloaded = 0;
|
|
11306
|
+
try {
|
|
11307
|
+
while (true) {
|
|
11308
|
+
const { done, value } = await reader.read();
|
|
11309
|
+
if (done) break;
|
|
11310
|
+
file.write(value);
|
|
11311
|
+
downloaded += value.length;
|
|
11312
|
+
onProgress?.(downloaded, totalBytes);
|
|
11313
|
+
}
|
|
11314
|
+
} finally {
|
|
11315
|
+
file.end();
|
|
11316
|
+
reader.releaseLock();
|
|
11317
|
+
}
|
|
11318
|
+
}
|
|
11319
|
+
async function installForPlatform(pkg, onProgress) {
|
|
11320
|
+
const platform = process.platform;
|
|
11321
|
+
await mkdir(CACHE_DIR, { recursive: true });
|
|
11322
|
+
const downloadPath = join4(CACHE_DIR, `download-${Date.now()}`);
|
|
11323
|
+
await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
|
|
11324
|
+
try {
|
|
11325
|
+
if (platform === "darwin") {
|
|
11326
|
+
return await installMacOS(pkg, downloadPath);
|
|
11327
|
+
} else if (platform === "linux") {
|
|
11328
|
+
return await installLinux(pkg, downloadPath);
|
|
11329
|
+
} else if (platform === "win32") {
|
|
11330
|
+
return await installWindows(pkg, downloadPath);
|
|
11331
|
+
}
|
|
11332
|
+
} catch (err) {
|
|
11333
|
+
await rm(downloadPath, { force: true });
|
|
11334
|
+
throw err;
|
|
11335
|
+
}
|
|
11336
|
+
throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
|
|
11337
|
+
}
|
|
11338
|
+
async function installMacOS(pkg, downloadPath) {
|
|
11339
|
+
const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
|
|
11340
|
+
await new Promise((resolve4, reject) => {
|
|
11341
|
+
const child = spawn2("hdiutil", ["attach", "-nobrowse", "-mountpoint", mountPoint, downloadPath]);
|
|
11342
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328")));
|
|
11343
|
+
});
|
|
11344
|
+
try {
|
|
11345
|
+
const appSource = join4(mountPoint, "LibreOffice.app");
|
|
11346
|
+
const appDest = join4(CACHE_DIR, "LibreOffice.app");
|
|
11347
|
+
await new Promise((resolve4, reject) => {
|
|
11348
|
+
const child = spawn2("cp", ["-R", appSource, appDest]);
|
|
11349
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
|
|
11350
|
+
});
|
|
11351
|
+
} finally {
|
|
11352
|
+
await new Promise((resolve4) => {
|
|
11353
|
+
const child = spawn2("hdiutil", ["detach", mountPoint]);
|
|
11354
|
+
child.on("close", () => resolve4());
|
|
11355
|
+
});
|
|
11356
|
+
}
|
|
11357
|
+
await rm(downloadPath, { force: true });
|
|
11358
|
+
return await createSymlink(join4(CACHE_DIR, pkg.binPath));
|
|
11359
|
+
}
|
|
11360
|
+
async function installLinux(pkg, downloadPath) {
|
|
11361
|
+
const extractDir = join4(CACHE_DIR, `extract-${Date.now()}`);
|
|
11362
|
+
await mkdir(extractDir, { recursive: true });
|
|
11363
|
+
await new Promise((resolve4, reject) => {
|
|
11364
|
+
const child = spawn2("tar", ["xzf", downloadPath, "-C", extractDir]);
|
|
11365
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
|
|
11366
|
+
});
|
|
11367
|
+
const debsDir = join4(extractDir, "DEBS");
|
|
11262
11368
|
try {
|
|
11263
|
-
await
|
|
11369
|
+
await access(debsDir);
|
|
11370
|
+
const entries = await (await import("fs/promises")).readdir(debsDir);
|
|
11371
|
+
for (const entry of entries) {
|
|
11372
|
+
if (entry.endsWith(".deb")) {
|
|
11373
|
+
await new Promise((resolve4, reject) => {
|
|
11374
|
+
const child = spawn2("dpkg-deb", ["-x", join4(debsDir, entry), CACHE_DIR]);
|
|
11375
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
|
|
11376
|
+
});
|
|
11377
|
+
}
|
|
11378
|
+
}
|
|
11264
11379
|
} catch {
|
|
11380
|
+
}
|
|
11381
|
+
await rm(downloadPath, { force: true });
|
|
11382
|
+
await rm(extractDir, { recursive: true, force: true });
|
|
11383
|
+
return await createSymlink(join4(CACHE_DIR, pkg.binPath));
|
|
11384
|
+
}
|
|
11385
|
+
async function installWindows(pkg, downloadPath) {
|
|
11386
|
+
await new Promise((resolve4, reject) => {
|
|
11387
|
+
const child = spawn2("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
|
|
11388
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
|
|
11389
|
+
});
|
|
11390
|
+
await rm(downloadPath, { force: true });
|
|
11391
|
+
return join4(CACHE_DIR, pkg.binPath);
|
|
11392
|
+
}
|
|
11393
|
+
async function createSymlink(actualBin) {
|
|
11394
|
+
const binDir = join4(CACHE_DIR, "bin");
|
|
11395
|
+
await mkdir(binDir, { recursive: true });
|
|
11396
|
+
const linkBin = join4(binDir, "soffice");
|
|
11397
|
+
try {
|
|
11398
|
+
await symlink(actualBin, linkBin);
|
|
11399
|
+
} catch {
|
|
11400
|
+
}
|
|
11401
|
+
process.env.PATH = `${binDir}${delimiter}${process.env.PATH}`;
|
|
11402
|
+
return linkBin;
|
|
11403
|
+
}
|
|
11404
|
+
async function installLibreOffice(onProgress) {
|
|
11405
|
+
const platform = process.platform;
|
|
11406
|
+
const pkg = PACKAGES[platform];
|
|
11407
|
+
if (!pkg) {
|
|
11265
11408
|
throw new ConvertError(
|
|
11409
|
+
"UNSUPPORTED_PLATFORM",
|
|
11410
|
+
`${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
|
|
11411
|
+
);
|
|
11412
|
+
}
|
|
11413
|
+
return await installForPlatform(pkg, onProgress);
|
|
11414
|
+
}
|
|
11415
|
+
async function resolveSoffice(emitter, autoInstall = true) {
|
|
11416
|
+
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11417
|
+
const inPath = await findInPath();
|
|
11418
|
+
if (inPath) {
|
|
11419
|
+
emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
|
|
11420
|
+
return inPath;
|
|
11421
|
+
}
|
|
11422
|
+
const inCache = await findInCache();
|
|
11423
|
+
if (inCache) {
|
|
11424
|
+
emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
|
|
11425
|
+
return inCache;
|
|
11426
|
+
}
|
|
11427
|
+
const inDefault = await findInDefaultPaths();
|
|
11428
|
+
if (inDefault) {
|
|
11429
|
+
emitter.validate("soffice_found", "\uAE30\uBCF8 \uACBD\uB85C\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inDefault });
|
|
11430
|
+
return inDefault;
|
|
11431
|
+
}
|
|
11432
|
+
if (!autoInstall) {
|
|
11433
|
+
emitter.error(
|
|
11434
|
+
"validate",
|
|
11266
11435
|
"SOFFICE_NOT_FOUND",
|
|
11267
|
-
"
|
|
11436
|
+
"LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
|
|
11437
|
+
"\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
|
|
11268
11438
|
);
|
|
11439
|
+
throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
|
|
11440
|
+
}
|
|
11441
|
+
if (installInFlight) {
|
|
11442
|
+
return installInFlight;
|
|
11269
11443
|
}
|
|
11444
|
+
emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
|
|
11445
|
+
installInFlight = (async () => {
|
|
11446
|
+
try {
|
|
11447
|
+
const installed = await installLibreOffice((downloaded, total) => {
|
|
11448
|
+
const percent = Math.round(downloaded / total * 100);
|
|
11449
|
+
emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
|
|
11450
|
+
percent,
|
|
11451
|
+
downloadedBytes: downloaded,
|
|
11452
|
+
totalBytes: total
|
|
11453
|
+
});
|
|
11454
|
+
});
|
|
11455
|
+
emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
|
|
11456
|
+
return installed;
|
|
11457
|
+
} catch (err) {
|
|
11458
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
11459
|
+
emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
|
|
11460
|
+
throw err;
|
|
11461
|
+
} finally {
|
|
11462
|
+
installInFlight = null;
|
|
11463
|
+
}
|
|
11464
|
+
})();
|
|
11465
|
+
return installInFlight;
|
|
11270
11466
|
}
|
|
11467
|
+
|
|
11468
|
+
// src/convert/libreoffice.ts
|
|
11469
|
+
var libreConvert = libre.convert;
|
|
11271
11470
|
async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
11272
11471
|
return new Promise((resolve4, reject) => {
|
|
11273
11472
|
const timer = setTimeout(() => {
|
|
@@ -11291,6 +11490,54 @@ async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
|
11291
11490
|
});
|
|
11292
11491
|
}
|
|
11293
11492
|
|
|
11493
|
+
// src/convert/events.ts
|
|
11494
|
+
var ConvertEventEmitter = class {
|
|
11495
|
+
listener = null;
|
|
11496
|
+
/** 이벤트 리스너 등록 */
|
|
11497
|
+
setListener(listener) {
|
|
11498
|
+
this.listener = listener;
|
|
11499
|
+
}
|
|
11500
|
+
/** 이벤트 발송 */
|
|
11501
|
+
emit(event) {
|
|
11502
|
+
try {
|
|
11503
|
+
this.listener?.(event);
|
|
11504
|
+
} catch {
|
|
11505
|
+
}
|
|
11506
|
+
}
|
|
11507
|
+
/** 타입 안전한 헬퍼: detect 이벤트 */
|
|
11508
|
+
detect(stage, message, meta) {
|
|
11509
|
+
this.emit({ type: "detect", stage, message, ...meta });
|
|
11510
|
+
}
|
|
11511
|
+
/** 타입 안전한 헬퍼: validate 이벤트 */
|
|
11512
|
+
validate(stage, message, meta) {
|
|
11513
|
+
this.emit({ type: "validate", stage, message, ...meta });
|
|
11514
|
+
}
|
|
11515
|
+
/** 타입 안전한 헬퍼: install 이벤트 */
|
|
11516
|
+
install(stage, message, meta) {
|
|
11517
|
+
this.emit({ type: "install", stage, message, ...meta });
|
|
11518
|
+
}
|
|
11519
|
+
/** 타입 안전한 헬퍼: convert 진행 이벤트 */
|
|
11520
|
+
progress(percent, message) {
|
|
11521
|
+
this.emit({ type: "convert", stage: "convert_progress", message, percent });
|
|
11522
|
+
}
|
|
11523
|
+
/** 타입 안전한 헬퍼: convert 시작 */
|
|
11524
|
+
convertStart(message) {
|
|
11525
|
+
this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
|
|
11526
|
+
}
|
|
11527
|
+
/** 타입 안전한 헬퍼: convert 완료 */
|
|
11528
|
+
convertDone(message) {
|
|
11529
|
+
this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
|
|
11530
|
+
}
|
|
11531
|
+
/** 타입 안전한 헬퍼: 완료 이벤트 */
|
|
11532
|
+
complete(result) {
|
|
11533
|
+
this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
|
|
11534
|
+
}
|
|
11535
|
+
/** 타입 안전한 헬퍼: 에러 이벤트 */
|
|
11536
|
+
error(stage, code, message, suggestion) {
|
|
11537
|
+
this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
|
|
11538
|
+
}
|
|
11539
|
+
};
|
|
11540
|
+
|
|
11294
11541
|
// src/convert/index.ts
|
|
11295
11542
|
var isConverting = false;
|
|
11296
11543
|
var queue = [];
|
|
@@ -11315,81 +11562,129 @@ async function acquireConvertLock() {
|
|
|
11315
11562
|
});
|
|
11316
11563
|
}
|
|
11317
11564
|
async function convertToPdf(input, options) {
|
|
11318
|
-
|
|
11319
|
-
|
|
11320
|
-
|
|
11321
|
-
buffer = await readFile(input);
|
|
11322
|
-
} else if (Buffer.isBuffer(input)) {
|
|
11323
|
-
buffer = input;
|
|
11324
|
-
} else {
|
|
11325
|
-
buffer = Buffer.from(input);
|
|
11326
|
-
}
|
|
11327
|
-
} catch (err) {
|
|
11328
|
-
return {
|
|
11329
|
-
success: false,
|
|
11330
|
-
code: "PARSE_ERROR",
|
|
11331
|
-
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
11332
|
-
stage: "detect"
|
|
11333
|
-
};
|
|
11565
|
+
const emitter = new ConvertEventEmitter();
|
|
11566
|
+
if (options?.onEvent) {
|
|
11567
|
+
emitter.setListener(options.onEvent);
|
|
11334
11568
|
}
|
|
11335
|
-
|
|
11336
|
-
|
|
11337
|
-
|
|
11338
|
-
|
|
11339
|
-
|
|
11340
|
-
|
|
11341
|
-
|
|
11342
|
-
};
|
|
11343
|
-
}
|
|
11344
|
-
const format = detectFormat(toArrayBuffer(buffer));
|
|
11345
|
-
if (format !== "hwp" && format !== "hwpx") {
|
|
11346
|
-
return {
|
|
11347
|
-
success: false,
|
|
11348
|
-
code: "UNSUPPORTED_FORMAT",
|
|
11349
|
-
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
11350
|
-
stage: "detect"
|
|
11351
|
-
};
|
|
11569
|
+
if (options?.onProgress) {
|
|
11570
|
+
const legacyProgress = options.onProgress;
|
|
11571
|
+
emitter.setListener((event) => {
|
|
11572
|
+
if (event.type === "convert" && event.stage === "convert_progress") {
|
|
11573
|
+
legacyProgress(event.percent, event.message);
|
|
11574
|
+
}
|
|
11575
|
+
});
|
|
11352
11576
|
}
|
|
11353
11577
|
try {
|
|
11354
|
-
|
|
11355
|
-
|
|
11356
|
-
|
|
11578
|
+
emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
|
|
11579
|
+
let buffer;
|
|
11580
|
+
try {
|
|
11581
|
+
if (typeof input === "string") {
|
|
11582
|
+
buffer = await readFile(input);
|
|
11583
|
+
} else if (Buffer.isBuffer(input)) {
|
|
11584
|
+
buffer = input;
|
|
11585
|
+
} else {
|
|
11586
|
+
buffer = Buffer.from(input);
|
|
11587
|
+
}
|
|
11588
|
+
} catch (err) {
|
|
11589
|
+
emitter.error(
|
|
11590
|
+
"detect",
|
|
11591
|
+
"PARSE_ERROR",
|
|
11592
|
+
`\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
|
|
11593
|
+
);
|
|
11357
11594
|
return {
|
|
11358
11595
|
success: false,
|
|
11359
|
-
code:
|
|
11360
|
-
error: err.message
|
|
11361
|
-
stage: "
|
|
11596
|
+
code: "PARSE_ERROR",
|
|
11597
|
+
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
11598
|
+
stage: "detect"
|
|
11362
11599
|
};
|
|
11363
11600
|
}
|
|
11364
|
-
|
|
11365
|
-
|
|
11366
|
-
|
|
11367
|
-
|
|
11368
|
-
|
|
11369
|
-
|
|
11370
|
-
|
|
11371
|
-
return {
|
|
11372
|
-
success: true,
|
|
11373
|
-
pdf: new Uint8Array(pdf),
|
|
11374
|
-
sourceFormat: format
|
|
11375
|
-
};
|
|
11376
|
-
} catch (err) {
|
|
11377
|
-
if (err instanceof ConvertError) {
|
|
11601
|
+
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
11602
|
+
if (buffer.length > MAX_FILE_SIZE) {
|
|
11603
|
+
emitter.error(
|
|
11604
|
+
"detect",
|
|
11605
|
+
"FILE_TOO_LARGE",
|
|
11606
|
+
`\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
|
|
11607
|
+
);
|
|
11378
11608
|
return {
|
|
11379
11609
|
success: false,
|
|
11380
|
-
code:
|
|
11381
|
-
error:
|
|
11610
|
+
code: "FILE_TOO_LARGE",
|
|
11611
|
+
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
11612
|
+
stage: "detect"
|
|
11613
|
+
};
|
|
11614
|
+
}
|
|
11615
|
+
const format = detectFormat(toArrayBuffer(buffer));
|
|
11616
|
+
emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
|
|
11617
|
+
if (format !== "hwp" && format !== "hwpx") {
|
|
11618
|
+
emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
|
|
11619
|
+
return {
|
|
11620
|
+
success: false,
|
|
11621
|
+
code: "UNSUPPORTED_FORMAT",
|
|
11622
|
+
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
11623
|
+
stage: "detect"
|
|
11624
|
+
};
|
|
11625
|
+
}
|
|
11626
|
+
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11627
|
+
let sofficePath;
|
|
11628
|
+
try {
|
|
11629
|
+
sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
|
|
11630
|
+
} catch (err) {
|
|
11631
|
+
if (err instanceof ConvertError) {
|
|
11632
|
+
return {
|
|
11633
|
+
success: false,
|
|
11634
|
+
code: err.code,
|
|
11635
|
+
error: err.message,
|
|
11636
|
+
stage: "validate"
|
|
11637
|
+
};
|
|
11638
|
+
}
|
|
11639
|
+
throw err;
|
|
11640
|
+
}
|
|
11641
|
+
const releaseLock = await acquireConvertLock();
|
|
11642
|
+
try {
|
|
11643
|
+
emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
|
|
11644
|
+
emitter.progress(10, "\uBCC0\uD658 \uC911...");
|
|
11645
|
+
const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
|
|
11646
|
+
emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
|
|
11647
|
+
emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
|
|
11648
|
+
const result = {
|
|
11649
|
+
success: true,
|
|
11650
|
+
pdf: new Uint8Array(pdf),
|
|
11651
|
+
sourceFormat: format
|
|
11652
|
+
};
|
|
11653
|
+
emitter.complete({
|
|
11654
|
+
sourceFormat: format,
|
|
11655
|
+
pdfSize: pdf.length
|
|
11656
|
+
});
|
|
11657
|
+
return result;
|
|
11658
|
+
} catch (err) {
|
|
11659
|
+
if (err instanceof ConvertError) {
|
|
11660
|
+
emitter.error("convert", err.code, err.message);
|
|
11661
|
+
return {
|
|
11662
|
+
success: false,
|
|
11663
|
+
code: err.code,
|
|
11664
|
+
error: err.message,
|
|
11665
|
+
stage: "convert"
|
|
11666
|
+
};
|
|
11667
|
+
}
|
|
11668
|
+
const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
|
|
11669
|
+
emitter.error("convert", classifyError(err), errorMsg);
|
|
11670
|
+
return {
|
|
11671
|
+
success: false,
|
|
11672
|
+
code: classifyError(err),
|
|
11673
|
+
error: errorMsg,
|
|
11382
11674
|
stage: "convert"
|
|
11383
11675
|
};
|
|
11676
|
+
} finally {
|
|
11677
|
+
releaseLock();
|
|
11384
11678
|
}
|
|
11679
|
+
} catch (unexpectedErr) {
|
|
11680
|
+
const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
|
|
11681
|
+
emitter.error("convert", "PARSE_ERROR", errorMsg);
|
|
11385
11682
|
return {
|
|
11386
11683
|
success: false,
|
|
11387
|
-
code:
|
|
11388
|
-
error:
|
|
11684
|
+
code: "PARSE_ERROR",
|
|
11685
|
+
error: errorMsg,
|
|
11389
11686
|
stage: "convert"
|
|
11390
11687
|
};
|
|
11391
|
-
} finally {
|
|
11392
|
-
releaseLock();
|
|
11393
11688
|
}
|
|
11394
11689
|
}
|
|
11395
11690
|
async function convertHwpToPdf(input, options) {
|
|
@@ -11417,9 +11712,6 @@ async function convertHwpxToPdf(input, options) {
|
|
|
11417
11712
|
return result;
|
|
11418
11713
|
}
|
|
11419
11714
|
|
|
11420
|
-
// src/index.ts
|
|
11421
|
-
init_utils();
|
|
11422
|
-
|
|
11423
11715
|
// src/ocr/api-key-rotation.ts
|
|
11424
11716
|
var AllKeysCoolingDownError = class extends Error {
|
|
11425
11717
|
waitMs;
|
|
@@ -11514,9 +11806,9 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11514
11806
|
};
|
|
11515
11807
|
|
|
11516
11808
|
// src/pipeline/unified-ocr.ts
|
|
11517
|
-
import { mkdir, readdir, readFile as readFile2, stat, writeFile } from "fs/promises";
|
|
11518
|
-
import { basename as basename2, dirname as dirname3, extname, join as
|
|
11519
|
-
import { spawn as
|
|
11809
|
+
import { mkdir as mkdir2, readdir, readFile as readFile2, stat, writeFile as writeFile2 } from "fs/promises";
|
|
11810
|
+
import { basename as basename2, delimiter as delimiter2, dirname as dirname3, extname, join as join5, resolve as resolve3 } from "path";
|
|
11811
|
+
import { spawn as spawn3 } from "child_process";
|
|
11520
11812
|
import { performance } from "perf_hooks";
|
|
11521
11813
|
init_logger();
|
|
11522
11814
|
|
|
@@ -11652,13 +11944,13 @@ function elapsedMs(startAt) {
|
|
|
11652
11944
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11653
11945
|
const absInput = resolve3(inputPath);
|
|
11654
11946
|
const stem = basename2(absInput, extname(absInput));
|
|
11655
|
-
const workspaceDir = resolve3(options.workspaceDir ??
|
|
11656
|
-
const imagesDir =
|
|
11657
|
-
const rawDir =
|
|
11658
|
-
const diffDir =
|
|
11659
|
-
const outputPath = resolve3(options.outputPath ??
|
|
11660
|
-
const reportPath =
|
|
11661
|
-
const modelCachePath =
|
|
11947
|
+
const workspaceDir = resolve3(options.workspaceDir ?? join5(dirname3(absInput), `${stem}_ocr_workspace`));
|
|
11948
|
+
const imagesDir = join5(workspaceDir, "images");
|
|
11949
|
+
const rawDir = join5(workspaceDir, "ocr", "raw");
|
|
11950
|
+
const diffDir = join5(workspaceDir, "ocr", "diff");
|
|
11951
|
+
const outputPath = resolve3(options.outputPath ?? join5(dirname3(absInput), `${stem}.md`));
|
|
11952
|
+
const reportPath = join5(workspaceDir, "run-report.json");
|
|
11953
|
+
const modelCachePath = join5(dirname3(absInput), ".kordoc-model-cache.json");
|
|
11662
11954
|
const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
|
|
11663
11955
|
const timeoutMs = options.timeoutMs ?? 6e4;
|
|
11664
11956
|
const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
|
|
@@ -11672,9 +11964,9 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11672
11964
|
const keyPool = ApiKeyRotationPool.fromEnv();
|
|
11673
11965
|
const runId = options.runId ?? generateRunId("ocr");
|
|
11674
11966
|
const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
|
|
11675
|
-
await
|
|
11676
|
-
await
|
|
11677
|
-
await
|
|
11967
|
+
await mkdir2(imagesDir, { recursive: true });
|
|
11968
|
+
await mkdir2(rawDir, { recursive: true });
|
|
11969
|
+
await mkdir2(diffDir, { recursive: true });
|
|
11678
11970
|
const timingsMs = {};
|
|
11679
11971
|
const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
|
|
11680
11972
|
const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
|
|
@@ -11691,11 +11983,29 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11691
11983
|
markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
|
|
11692
11984
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
11693
11985
|
if (extname(absInput).toLowerCase() !== ".pdf") {
|
|
11694
|
-
|
|
11695
|
-
|
|
11986
|
+
const convertEmitter = new ConvertEventEmitter();
|
|
11987
|
+
if (options.onEvent) {
|
|
11988
|
+
convertEmitter.setListener((evt) => {
|
|
11989
|
+
if (evt.type === "install" || evt.type === "validate" || evt.type === "error") {
|
|
11990
|
+
try {
|
|
11991
|
+
;
|
|
11992
|
+
options.onEvent(evt);
|
|
11993
|
+
} catch {
|
|
11994
|
+
}
|
|
11995
|
+
}
|
|
11996
|
+
});
|
|
11997
|
+
}
|
|
11998
|
+
if (options.sofficePath) {
|
|
11999
|
+
const sofficeDir = dirname3(options.sofficePath);
|
|
12000
|
+
process.env.PATH = `${sofficeDir}${delimiter2}${process.env.PATH ?? ""}`;
|
|
12001
|
+
convertEmitter.validate("soffice_found", "\uC9C1\uC811 \uC9C0\uC815\uB41C LibreOffice \uACBD\uB85C \uC0AC\uC6A9", { sofficePath: options.sofficePath });
|
|
12002
|
+
} else {
|
|
12003
|
+
await resolveSoffice(convertEmitter, options.autoInstallLibreOffice ?? false);
|
|
12004
|
+
}
|
|
12005
|
+
workingPdfPath = join5(workspaceDir, `${stem}.pdf`);
|
|
11696
12006
|
const inputBuffer = await readFile2(absInput);
|
|
11697
12007
|
const out = await convertBuffer(inputBuffer, ".pdf");
|
|
11698
|
-
await
|
|
12008
|
+
await writeFile2(workingPdfPath, out);
|
|
11699
12009
|
}
|
|
11700
12010
|
timingsMs.convert = elapsedMs(convertStart);
|
|
11701
12011
|
markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
|
|
@@ -11706,10 +12016,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11706
12016
|
if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11707
12017
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
11708
12018
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
|
|
11709
|
-
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath,
|
|
12019
|
+
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join5(imagesDir, "page")]);
|
|
11710
12020
|
const firstFiles = (await readdir(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
11711
12021
|
if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
|
|
11712
|
-
const probeImage =
|
|
12022
|
+
const probeImage = join5(imagesDir, firstFiles[0]);
|
|
11713
12023
|
markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
|
|
11714
12024
|
const probeStart = performance.now();
|
|
11715
12025
|
currentStage = "probe";
|
|
@@ -11755,7 +12065,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11755
12065
|
try {
|
|
11756
12066
|
await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
|
|
11757
12067
|
if (totalPages > 1) {
|
|
11758
|
-
for await (const item of renderPdfToPngStream(workingPdfPath,
|
|
12068
|
+
for await (const item of renderPdfToPngStream(workingPdfPath, join5(imagesDir, "page"), dpi, totalPages, 2)) {
|
|
11759
12069
|
await queue2.enqueue(item);
|
|
11760
12070
|
renderDone++;
|
|
11761
12071
|
markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
|
|
@@ -11805,8 +12115,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11805
12115
|
const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
|
|
11806
12116
|
const rawPagePaths = [];
|
|
11807
12117
|
for (const [pageNum, markdown] of sortedEntries) {
|
|
11808
|
-
const pagePath =
|
|
11809
|
-
await
|
|
12118
|
+
const pagePath = join5(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
|
|
12119
|
+
await writeFile2(pagePath, markdown, "utf-8");
|
|
11810
12120
|
rawPagePaths.push(pagePath);
|
|
11811
12121
|
}
|
|
11812
12122
|
const mergeStart = performance.now();
|
|
@@ -11814,7 +12124,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11814
12124
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11815
12125
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
11816
12126
|
const merged = await mergeMarkdownPages(rawPagePaths);
|
|
11817
|
-
await
|
|
12127
|
+
await writeFile2(outputPath, merged, "utf-8");
|
|
11818
12128
|
timingsMs.merge = elapsedMs(mergeStart);
|
|
11819
12129
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
11820
12130
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
@@ -11830,7 +12140,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11830
12140
|
timingsMs,
|
|
11831
12141
|
modelCachePath
|
|
11832
12142
|
};
|
|
11833
|
-
await
|
|
12143
|
+
await writeFile2(reportPath, JSON.stringify(report, null, 2), "utf-8");
|
|
11834
12144
|
logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
|
|
11835
12145
|
return { outputPath, reportPath, selectedModel };
|
|
11836
12146
|
} catch (err) {
|
|
@@ -11921,7 +12231,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
11921
12231
|
]);
|
|
11922
12232
|
const files = await readdir(imagesDir);
|
|
11923
12233
|
const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
11924
|
-
const imagePath =
|
|
12234
|
+
const imagePath = join5(imagesDir, pageFiles[pageFiles.length - 1]);
|
|
11925
12235
|
yield { pageNumber: page, imagePath };
|
|
11926
12236
|
} catch (err) {
|
|
11927
12237
|
yield {
|
|
@@ -11934,7 +12244,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
11934
12244
|
}
|
|
11935
12245
|
async function runCommand(cmd, args) {
|
|
11936
12246
|
await new Promise((resolvePromise, reject) => {
|
|
11937
|
-
const child =
|
|
12247
|
+
const child = spawn3(cmd, args, { stdio: "pipe" });
|
|
11938
12248
|
let stderr = "";
|
|
11939
12249
|
child.stderr.on("data", (d) => {
|
|
11940
12250
|
stderr += String(d);
|
|
@@ -11948,7 +12258,7 @@ async function runCommand(cmd, args) {
|
|
|
11948
12258
|
}
|
|
11949
12259
|
async function runCommandWithStdout(cmd, args) {
|
|
11950
12260
|
return await new Promise((resolvePromise, reject) => {
|
|
11951
|
-
const child =
|
|
12261
|
+
const child = spawn3(cmd, args, { stdio: "pipe" });
|
|
11952
12262
|
let stdout = "";
|
|
11953
12263
|
let stderr = "";
|
|
11954
12264
|
child.stdout.on("data", (d) => {
|
|
@@ -12068,7 +12378,7 @@ async function updateModelCache(path, probes) {
|
|
|
12068
12378
|
}
|
|
12069
12379
|
}
|
|
12070
12380
|
current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
12071
|
-
await
|
|
12381
|
+
await writeFile2(path, JSON.stringify(current, null, 2), "utf-8");
|
|
12072
12382
|
}
|
|
12073
12383
|
async function ocrWorkerPool(input) {
|
|
12074
12384
|
const { queue: queue2, workerCount, ocrInput, onPageDone } = input;
|
|
@@ -12264,6 +12574,16 @@ function ensureSupportedInput(path) {
|
|
|
12264
12574
|
}
|
|
12265
12575
|
function normalizePipelineError(err, stage) {
|
|
12266
12576
|
if (err instanceof UnifiedOcrError) return err;
|
|
12577
|
+
if (err instanceof ConvertError) {
|
|
12578
|
+
const codeMap = {
|
|
12579
|
+
SOFFICE_NOT_FOUND: "SOFFICE_NOT_FOUND",
|
|
12580
|
+
CONVERT_FAILED: "CONVERT_FAILED",
|
|
12581
|
+
TIMEOUT: "CONVERT_FAILED",
|
|
12582
|
+
UNSUPPORTED_PLATFORM: "CONVERT_FAILED",
|
|
12583
|
+
UNSUPPORTED_FORMAT: "UNSUPPORTED_INPUT"
|
|
12584
|
+
};
|
|
12585
|
+
return new UnifiedOcrError(codeMap[err.code] ?? "CONVERT_FAILED", stage, err.message);
|
|
12586
|
+
}
|
|
12267
12587
|
const message = err instanceof Error ? err.message : String(err);
|
|
12268
12588
|
const codeByStage = {
|
|
12269
12589
|
convert: "CONVERT_FAILED",
|