@clazic/kordoc 2.6.1 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-BZPZXI66.js → chunk-FZJLIDFL.js} +74 -26
- package/dist/chunk-FZJLIDFL.js.map +1 -0
- package/dist/{chunk-4X5JCZFZ.js → chunk-YIJCHZLO.js} +2 -2
- package/dist/cli.js +6 -6
- package/dist/index.cjs +191 -161
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +8 -2
- package/dist/index.d.ts +8 -2
- package/dist/index.js +192 -162
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-56QT5C33.js → utils-MAETCW66.js} +2 -2
- package/dist/{watch-HRNMJWSE.js → watch-6HVRALTX.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-BZPZXI66.js.map +0 -1
- /package/dist/{chunk-4X5JCZFZ.js.map → chunk-YIJCHZLO.js.map} +0 -0
- /package/dist/{utils-56QT5C33.js.map → utils-MAETCW66.js.map} +0 -0
- /package/dist/{watch-HRNMJWSE.js.map → watch-6HVRALTX.js.map} +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.
|
|
4
|
+
var VERSION = true ? "2.7.1" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -105,4 +105,4 @@ export {
|
|
|
105
105
|
classifyError,
|
|
106
106
|
normalizeKordocError
|
|
107
107
|
};
|
|
108
|
-
//# sourceMappingURL=chunk-
|
|
108
|
+
//# sourceMappingURL=chunk-YIJCHZLO.js.map
|
package/dist/cli.js
CHANGED
|
@@ -5,11 +5,11 @@ import {
|
|
|
5
5
|
markdownToHwpx,
|
|
6
6
|
markdownToXlsx,
|
|
7
7
|
parse
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-FZJLIDFL.js";
|
|
9
9
|
import {
|
|
10
10
|
VERSION,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-YIJCHZLO.js";
|
|
13
13
|
import "./chunk-MOL7MDBG.js";
|
|
14
14
|
import "./chunk-S7BHLD2V.js";
|
|
15
15
|
import "./chunk-YW5G6BCJ.js";
|
|
@@ -174,7 +174,7 @@ async function runParse(files, opts) {
|
|
|
174
174
|
saveImages(absPath);
|
|
175
175
|
}
|
|
176
176
|
} catch (err) {
|
|
177
|
-
const { sanitizeError } = await import("./utils-
|
|
177
|
+
const { sanitizeError } = await import("./utils-MAETCW66.js");
|
|
178
178
|
process.stderr.write(`
|
|
179
179
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
180
180
|
`);
|
|
@@ -256,7 +256,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
256
256
|
`));
|
|
257
257
|
}
|
|
258
258
|
} catch (err) {
|
|
259
|
-
const { sanitizeError } = await import("./utils-
|
|
259
|
+
const { sanitizeError } = await import("./utils-MAETCW66.js");
|
|
260
260
|
process.stderr.write(` FAIL
|
|
261
261
|
`);
|
|
262
262
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -337,7 +337,7 @@ program.command("convert-pdf <input>").description("HWP/HWPX \uD30C\uC77C\uC744
|
|
|
337
337
|
`);
|
|
338
338
|
}
|
|
339
339
|
} catch (err) {
|
|
340
|
-
const { sanitizeError } = await import("./utils-
|
|
340
|
+
const { sanitizeError } = await import("./utils-MAETCW66.js");
|
|
341
341
|
process.stderr.write(` FAIL
|
|
342
342
|
`);
|
|
343
343
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -369,7 +369,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
|
|
|
369
369
|
}
|
|
370
370
|
});
|
|
371
371
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
372
|
-
const { watchDirectory } = await import("./watch-
|
|
372
|
+
const { watchDirectory } = await import("./watch-6HVRALTX.js");
|
|
373
373
|
await watchDirectory({
|
|
374
374
|
dir,
|
|
375
375
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -33,118 +33,6 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
33
33
|
));
|
|
34
34
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
35
35
|
|
|
36
|
-
// src/utils.ts
|
|
37
|
-
var utils_exports = {};
|
|
38
|
-
__export(utils_exports, {
|
|
39
|
-
KordocError: () => KordocError,
|
|
40
|
-
VERSION: () => VERSION,
|
|
41
|
-
classifyError: () => classifyError,
|
|
42
|
-
isPathTraversal: () => isPathTraversal,
|
|
43
|
-
normalizeKordocError: () => normalizeKordocError,
|
|
44
|
-
precheckZipSize: () => precheckZipSize,
|
|
45
|
-
sanitizeError: () => sanitizeError,
|
|
46
|
-
sanitizeHref: () => sanitizeHref,
|
|
47
|
-
toArrayBuffer: () => toArrayBuffer
|
|
48
|
-
});
|
|
49
|
-
function toArrayBuffer(buf) {
|
|
50
|
-
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
51
|
-
return buf.buffer;
|
|
52
|
-
}
|
|
53
|
-
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
54
|
-
}
|
|
55
|
-
function sanitizeError(err) {
|
|
56
|
-
if (err instanceof KordocError) return err.message;
|
|
57
|
-
return "\uBB38\uC11C \uCC98\uB9AC \uC911 \uC624\uB958\uAC00 \uBC1C\uC0DD\uD588\uC2B5\uB2C8\uB2E4";
|
|
58
|
-
}
|
|
59
|
-
function isPathTraversal(name) {
|
|
60
|
-
if (name.includes("\0")) return true;
|
|
61
|
-
const normalized = name.replace(/\\/g, "/");
|
|
62
|
-
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
63
|
-
}
|
|
64
|
-
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
65
|
-
try {
|
|
66
|
-
const data = new DataView(buffer);
|
|
67
|
-
const len = buffer.byteLength;
|
|
68
|
-
let eocdOffset = -1;
|
|
69
|
-
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
70
|
-
if (data.getUint32(i, true) === 101010256) {
|
|
71
|
-
eocdOffset = i;
|
|
72
|
-
break;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
76
|
-
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
77
|
-
if (entryCount > maxEntries) {
|
|
78
|
-
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
79
|
-
}
|
|
80
|
-
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
81
|
-
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
82
|
-
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
83
|
-
let totalUncompressed = 0;
|
|
84
|
-
let pos = cdOffset;
|
|
85
|
-
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
86
|
-
if (data.getUint32(pos, true) !== 33639248) break;
|
|
87
|
-
totalUncompressed += data.getUint32(pos + 24, true);
|
|
88
|
-
const nameLen = data.getUint16(pos + 28, true);
|
|
89
|
-
const extraLen = data.getUint16(pos + 30, true);
|
|
90
|
-
const commentLen = data.getUint16(pos + 32, true);
|
|
91
|
-
pos += 46 + nameLen + extraLen + commentLen;
|
|
92
|
-
}
|
|
93
|
-
if (totalUncompressed > maxUncompressedSize) {
|
|
94
|
-
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
95
|
-
}
|
|
96
|
-
return { totalUncompressed, entryCount };
|
|
97
|
-
} catch (err) {
|
|
98
|
-
if (err instanceof KordocError) throw err;
|
|
99
|
-
return { totalUncompressed: 0, entryCount: 0 };
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
function sanitizeHref(href) {
|
|
103
|
-
const trimmed = href.trim();
|
|
104
|
-
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
105
|
-
return trimmed;
|
|
106
|
-
}
|
|
107
|
-
function classifyError(err) {
|
|
108
|
-
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
109
|
-
const msg = err.message;
|
|
110
|
-
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
111
|
-
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
112
|
-
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
113
|
-
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
114
|
-
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
115
|
-
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
116
|
-
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
117
|
-
return "PARSE_ERROR";
|
|
118
|
-
}
|
|
119
|
-
function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
|
|
120
|
-
if (err instanceof KordocError) {
|
|
121
|
-
if (!err.stage) err.stage = stage;
|
|
122
|
-
if (!err.code) err.code = fallbackCode;
|
|
123
|
-
return err;
|
|
124
|
-
}
|
|
125
|
-
const message = err instanceof Error ? err.message : fallbackMessage;
|
|
126
|
-
const code = err instanceof Error ? classifyError(err) : fallbackCode;
|
|
127
|
-
return new KordocError(message || fallbackMessage, { code, stage });
|
|
128
|
-
}
|
|
129
|
-
var VERSION, KordocError, SAFE_HREF_RE;
|
|
130
|
-
var init_utils = __esm({
|
|
131
|
-
"src/utils.ts"() {
|
|
132
|
-
"use strict";
|
|
133
|
-
VERSION = true ? "2.6.0" : "0.0.0-dev";
|
|
134
|
-
KordocError = class extends Error {
|
|
135
|
-
code;
|
|
136
|
-
stage;
|
|
137
|
-
constructor(message, opts = {}) {
|
|
138
|
-
super(message);
|
|
139
|
-
this.name = "KordocError";
|
|
140
|
-
this.code = opts.code;
|
|
141
|
-
this.stage = opts.stage;
|
|
142
|
-
}
|
|
143
|
-
};
|
|
144
|
-
SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
145
|
-
}
|
|
146
|
-
});
|
|
147
|
-
|
|
148
36
|
// src/page-range.ts
|
|
149
37
|
var page_range_exports = {};
|
|
150
38
|
__export(page_range_exports, {
|
|
@@ -3248,8 +3136,97 @@ async function detectZipFormat(buffer) {
|
|
|
3248
3136
|
var import_jszip2 = __toESM(require("jszip"), 1);
|
|
3249
3137
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3250
3138
|
|
|
3139
|
+
// src/utils.ts
|
|
3140
|
+
var VERSION = true ? "2.7.1" : "0.0.0-dev";
|
|
3141
|
+
function toArrayBuffer(buf) {
|
|
3142
|
+
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3143
|
+
return buf.buffer;
|
|
3144
|
+
}
|
|
3145
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
3146
|
+
}
|
|
3147
|
+
var KordocError = class extends Error {
|
|
3148
|
+
code;
|
|
3149
|
+
stage;
|
|
3150
|
+
constructor(message, opts = {}) {
|
|
3151
|
+
super(message);
|
|
3152
|
+
this.name = "KordocError";
|
|
3153
|
+
this.code = opts.code;
|
|
3154
|
+
this.stage = opts.stage;
|
|
3155
|
+
}
|
|
3156
|
+
};
|
|
3157
|
+
function isPathTraversal(name) {
|
|
3158
|
+
if (name.includes("\0")) return true;
|
|
3159
|
+
const normalized = name.replace(/\\/g, "/");
|
|
3160
|
+
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
3161
|
+
}
|
|
3162
|
+
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
3163
|
+
try {
|
|
3164
|
+
const data = new DataView(buffer);
|
|
3165
|
+
const len = buffer.byteLength;
|
|
3166
|
+
let eocdOffset = -1;
|
|
3167
|
+
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
3168
|
+
if (data.getUint32(i, true) === 101010256) {
|
|
3169
|
+
eocdOffset = i;
|
|
3170
|
+
break;
|
|
3171
|
+
}
|
|
3172
|
+
}
|
|
3173
|
+
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
3174
|
+
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
3175
|
+
if (entryCount > maxEntries) {
|
|
3176
|
+
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
3177
|
+
}
|
|
3178
|
+
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
3179
|
+
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
3180
|
+
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
3181
|
+
let totalUncompressed = 0;
|
|
3182
|
+
let pos = cdOffset;
|
|
3183
|
+
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
3184
|
+
if (data.getUint32(pos, true) !== 33639248) break;
|
|
3185
|
+
totalUncompressed += data.getUint32(pos + 24, true);
|
|
3186
|
+
const nameLen = data.getUint16(pos + 28, true);
|
|
3187
|
+
const extraLen = data.getUint16(pos + 30, true);
|
|
3188
|
+
const commentLen = data.getUint16(pos + 32, true);
|
|
3189
|
+
pos += 46 + nameLen + extraLen + commentLen;
|
|
3190
|
+
}
|
|
3191
|
+
if (totalUncompressed > maxUncompressedSize) {
|
|
3192
|
+
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
3193
|
+
}
|
|
3194
|
+
return { totalUncompressed, entryCount };
|
|
3195
|
+
} catch (err) {
|
|
3196
|
+
if (err instanceof KordocError) throw err;
|
|
3197
|
+
return { totalUncompressed: 0, entryCount: 0 };
|
|
3198
|
+
}
|
|
3199
|
+
}
|
|
3200
|
+
var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
3201
|
+
function sanitizeHref(href) {
|
|
3202
|
+
const trimmed = href.trim();
|
|
3203
|
+
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
3204
|
+
return trimmed;
|
|
3205
|
+
}
|
|
3206
|
+
function classifyError(err) {
|
|
3207
|
+
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
3208
|
+
const msg = err.message;
|
|
3209
|
+
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
3210
|
+
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
3211
|
+
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
3212
|
+
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
3213
|
+
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
3214
|
+
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
3215
|
+
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
3216
|
+
return "PARSE_ERROR";
|
|
3217
|
+
}
|
|
3218
|
+
function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
|
|
3219
|
+
if (err instanceof KordocError) {
|
|
3220
|
+
if (!err.stage) err.stage = stage;
|
|
3221
|
+
if (!err.code) err.code = fallbackCode;
|
|
3222
|
+
return err;
|
|
3223
|
+
}
|
|
3224
|
+
const message = err instanceof Error ? err.message : fallbackMessage;
|
|
3225
|
+
const code = err instanceof Error ? classifyError(err) : fallbackCode;
|
|
3226
|
+
return new KordocError(message || fallbackMessage, { code, stage });
|
|
3227
|
+
}
|
|
3228
|
+
|
|
3251
3229
|
// src/table/builder.ts
|
|
3252
|
-
init_utils();
|
|
3253
3230
|
var MAX_COLS = 200;
|
|
3254
3231
|
var MAX_ROWS = 1e4;
|
|
3255
3232
|
function buildTable(rows) {
|
|
@@ -3509,8 +3486,6 @@ var HEADING_RATIO_H2 = 1.3;
|
|
|
3509
3486
|
var HEADING_RATIO_H3 = 1.15;
|
|
3510
3487
|
|
|
3511
3488
|
// src/hwpx/parser.ts
|
|
3512
|
-
init_utils();
|
|
3513
|
-
init_utils();
|
|
3514
3489
|
init_page_range();
|
|
3515
3490
|
init_logger();
|
|
3516
3491
|
var MAX_DECOMPRESS_SIZE = 500 * 1024 * 1024;
|
|
@@ -4353,7 +4328,6 @@ function extractTextFromNode(node) {
|
|
|
4353
4328
|
|
|
4354
4329
|
// src/hwp5/record.ts
|
|
4355
4330
|
var import_zlib = require("zlib");
|
|
4356
|
-
init_utils();
|
|
4357
4331
|
var TAG_PARA_HEADER = 66;
|
|
4358
4332
|
var TAG_PARA_TEXT = 67;
|
|
4359
4333
|
var TAG_CHAR_SHAPE = 68;
|
|
@@ -5403,7 +5377,6 @@ function parseLenientCfb(data) {
|
|
|
5403
5377
|
}
|
|
5404
5378
|
|
|
5405
5379
|
// src/hwp5/parser.ts
|
|
5406
|
-
init_utils();
|
|
5407
5380
|
init_page_range();
|
|
5408
5381
|
init_logger();
|
|
5409
5382
|
var CFB = __toESM(require_cfb(), 1);
|
|
@@ -6059,7 +6032,6 @@ function arrangeCells(rows, cols, cells) {
|
|
|
6059
6032
|
}
|
|
6060
6033
|
|
|
6061
6034
|
// src/pdf/parser.ts
|
|
6062
|
-
init_utils();
|
|
6063
6035
|
init_page_range();
|
|
6064
6036
|
var import_module = require("module");
|
|
6065
6037
|
var import_path4 = require("path");
|
|
@@ -7953,7 +7925,6 @@ function mergeKoreanLines(text) {
|
|
|
7953
7925
|
// src/xlsx/parser.ts
|
|
7954
7926
|
var import_jszip3 = __toESM(require("jszip"), 1);
|
|
7955
7927
|
var import_xmldom2 = require("@xmldom/xmldom");
|
|
7956
|
-
init_utils();
|
|
7957
7928
|
init_logger();
|
|
7958
7929
|
var MAX_SHEETS = 100;
|
|
7959
7930
|
var MAX_DECOMPRESS_SIZE3 = 500 * 1024 * 1024;
|
|
@@ -8282,7 +8253,6 @@ async function parseXlsxDocument(buffer, options, existingZip) {
|
|
|
8282
8253
|
// src/docx/parser.ts
|
|
8283
8254
|
var import_jszip4 = __toESM(require("jszip"), 1);
|
|
8284
8255
|
var import_xmldom3 = require("@xmldom/xmldom");
|
|
8285
|
-
init_utils();
|
|
8286
8256
|
init_logger();
|
|
8287
8257
|
var MAX_DECOMPRESS_SIZE4 = 500 * 1024 * 1024;
|
|
8288
8258
|
function getChildElements(parent, localName) {
|
|
@@ -8762,7 +8732,6 @@ async function parseDocxDocument(buffer, options, existingZip) {
|
|
|
8762
8732
|
}
|
|
8763
8733
|
|
|
8764
8734
|
// src/index.ts
|
|
8765
|
-
init_utils();
|
|
8766
8735
|
init_cli_provider();
|
|
8767
8736
|
init_markdown_to_blocks();
|
|
8768
8737
|
init_logger();
|
|
@@ -11266,7 +11235,6 @@ async function markdownToXlsx(markdown, options) {
|
|
|
11266
11235
|
|
|
11267
11236
|
// src/convert/index.ts
|
|
11268
11237
|
var import_promises3 = require("fs/promises");
|
|
11269
|
-
init_utils();
|
|
11270
11238
|
|
|
11271
11239
|
// src/convert/libreoffice.ts
|
|
11272
11240
|
var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
|
|
@@ -11286,6 +11254,7 @@ var import_path5 = require("path");
|
|
|
11286
11254
|
var import_promises2 = require("fs/promises");
|
|
11287
11255
|
var import_fs4 = require("fs");
|
|
11288
11256
|
var import_child_process4 = require("child_process");
|
|
11257
|
+
var installInFlight = null;
|
|
11289
11258
|
var CACHE_DIR = (0, import_path5.join)((0, import_os3.homedir)(), ".cache", "kordoc", "libreoffice");
|
|
11290
11259
|
var VERSION_FILE = (0, import_path5.join)(CACHE_DIR, "version");
|
|
11291
11260
|
var PACKAGES = {
|
|
@@ -11306,13 +11275,11 @@ var PACKAGES = {
|
|
|
11306
11275
|
}
|
|
11307
11276
|
};
|
|
11308
11277
|
async function findInPath() {
|
|
11309
|
-
|
|
11310
|
-
const
|
|
11311
|
-
|
|
11312
|
-
|
|
11313
|
-
}
|
|
11314
|
-
return null;
|
|
11315
|
-
}
|
|
11278
|
+
return new Promise((resolve4) => {
|
|
11279
|
+
const child = (0, import_child_process4.spawn)("soffice", ["--version"], { stdio: "ignore" });
|
|
11280
|
+
child.on("close", (code) => resolve4(code === 0 ? "soffice" : null));
|
|
11281
|
+
child.on("error", () => resolve4(null));
|
|
11282
|
+
});
|
|
11316
11283
|
}
|
|
11317
11284
|
async function findInCache() {
|
|
11318
11285
|
const cachedBin = (0, import_path5.join)(CACHE_DIR, "bin", "soffice");
|
|
@@ -11323,6 +11290,38 @@ async function findInCache() {
|
|
|
11323
11290
|
return null;
|
|
11324
11291
|
}
|
|
11325
11292
|
}
|
|
11293
|
+
async function findInDefaultPaths() {
|
|
11294
|
+
const platform = process.platform;
|
|
11295
|
+
const paths = [];
|
|
11296
|
+
if (platform === "darwin") {
|
|
11297
|
+
paths.push(
|
|
11298
|
+
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
|
|
11299
|
+
"/opt/homebrew/bin/soffice",
|
|
11300
|
+
"/usr/local/bin/soffice"
|
|
11301
|
+
);
|
|
11302
|
+
} else if (platform === "linux") {
|
|
11303
|
+
paths.push(
|
|
11304
|
+
"/usr/bin/soffice",
|
|
11305
|
+
"/usr/lib/libreoffice/program/soffice"
|
|
11306
|
+
);
|
|
11307
|
+
} else if (platform === "win32") {
|
|
11308
|
+
const pf = process.env["ProgramFiles"] ?? "C:\\Program Files";
|
|
11309
|
+
const pf86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
|
|
11310
|
+
paths.push(
|
|
11311
|
+
(0, import_path5.join)(pf, "LibreOffice", "program", "soffice.exe"),
|
|
11312
|
+
(0, import_path5.join)(pf86, "LibreOffice", "program", "soffice.exe")
|
|
11313
|
+
);
|
|
11314
|
+
}
|
|
11315
|
+
for (const p of paths) {
|
|
11316
|
+
try {
|
|
11317
|
+
await (0, import_promises2.access)(p);
|
|
11318
|
+
return p;
|
|
11319
|
+
} catch {
|
|
11320
|
+
continue;
|
|
11321
|
+
}
|
|
11322
|
+
}
|
|
11323
|
+
return null;
|
|
11324
|
+
}
|
|
11326
11325
|
async function downloadWithProgress(url, dest, totalBytes, onProgress) {
|
|
11327
11326
|
const response = await fetch(url);
|
|
11328
11327
|
if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
|
|
@@ -11333,13 +11332,17 @@ async function downloadWithProgress(url, dest, totalBytes, onProgress) {
|
|
|
11333
11332
|
while (true) {
|
|
11334
11333
|
const { done, value } = await reader.read();
|
|
11335
11334
|
if (done) break;
|
|
11336
|
-
file.write(value)
|
|
11335
|
+
if (!file.write(value)) {
|
|
11336
|
+
await new Promise((resolve4) => file.once("drain", resolve4));
|
|
11337
|
+
}
|
|
11337
11338
|
downloaded += value.length;
|
|
11338
11339
|
onProgress?.(downloaded, totalBytes);
|
|
11339
11340
|
}
|
|
11340
11341
|
} finally {
|
|
11341
|
-
file.end();
|
|
11342
11342
|
reader.releaseLock();
|
|
11343
|
+
await new Promise((resolve4, reject) => {
|
|
11344
|
+
file.end((err) => err ? reject(err) : resolve4());
|
|
11345
|
+
});
|
|
11343
11346
|
}
|
|
11344
11347
|
}
|
|
11345
11348
|
async function installForPlatform(pkg, onProgress) {
|
|
@@ -11450,6 +11453,11 @@ async function resolveSoffice(emitter, autoInstall = true) {
|
|
|
11450
11453
|
emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
|
|
11451
11454
|
return inCache;
|
|
11452
11455
|
}
|
|
11456
|
+
const inDefault = await findInDefaultPaths();
|
|
11457
|
+
if (inDefault) {
|
|
11458
|
+
emitter.validate("soffice_found", "\uAE30\uBCF8 \uACBD\uB85C\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inDefault });
|
|
11459
|
+
return inDefault;
|
|
11460
|
+
}
|
|
11453
11461
|
if (!autoInstall) {
|
|
11454
11462
|
emitter.error(
|
|
11455
11463
|
"validate",
|
|
@@ -11459,38 +11467,35 @@ async function resolveSoffice(emitter, autoInstall = true) {
|
|
|
11459
11467
|
);
|
|
11460
11468
|
throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
|
|
11461
11469
|
}
|
|
11470
|
+
if (installInFlight) {
|
|
11471
|
+
return installInFlight;
|
|
11472
|
+
}
|
|
11462
11473
|
emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
|
|
11463
|
-
|
|
11464
|
-
|
|
11465
|
-
const
|
|
11466
|
-
|
|
11467
|
-
percent
|
|
11468
|
-
|
|
11469
|
-
|
|
11474
|
+
installInFlight = (async () => {
|
|
11475
|
+
try {
|
|
11476
|
+
const installed = await installLibreOffice((downloaded, total) => {
|
|
11477
|
+
const percent = Math.round(downloaded / total * 100);
|
|
11478
|
+
emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
|
|
11479
|
+
percent,
|
|
11480
|
+
downloadedBytes: downloaded,
|
|
11481
|
+
totalBytes: total
|
|
11482
|
+
});
|
|
11470
11483
|
});
|
|
11471
|
-
|
|
11472
|
-
|
|
11473
|
-
|
|
11474
|
-
|
|
11475
|
-
|
|
11476
|
-
|
|
11477
|
-
|
|
11478
|
-
|
|
11484
|
+
emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
|
|
11485
|
+
return installed;
|
|
11486
|
+
} catch (err) {
|
|
11487
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
11488
|
+
emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
|
|
11489
|
+
throw err;
|
|
11490
|
+
} finally {
|
|
11491
|
+
installInFlight = null;
|
|
11492
|
+
}
|
|
11493
|
+
})();
|
|
11494
|
+
return installInFlight;
|
|
11479
11495
|
}
|
|
11480
11496
|
|
|
11481
11497
|
// src/convert/libreoffice.ts
|
|
11482
11498
|
var libreConvert = import_libreoffice_convert.default.convert;
|
|
11483
|
-
async function assertSofficeAvailable() {
|
|
11484
|
-
const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
|
|
11485
|
-
try {
|
|
11486
|
-
await runCommand2("soffice", ["--version"]);
|
|
11487
|
-
} catch {
|
|
11488
|
-
throw new ConvertError(
|
|
11489
|
-
"SOFFICE_NOT_FOUND",
|
|
11490
|
-
"soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694."
|
|
11491
|
-
);
|
|
11492
|
-
}
|
|
11493
|
-
}
|
|
11494
11499
|
async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
11495
11500
|
return new Promise((resolve4, reject) => {
|
|
11496
11501
|
const timer = setTimeout(() => {
|
|
@@ -11736,9 +11741,6 @@ async function convertHwpxToPdf(input, options) {
|
|
|
11736
11741
|
return result;
|
|
11737
11742
|
}
|
|
11738
11743
|
|
|
11739
|
-
// src/index.ts
|
|
11740
|
-
init_utils();
|
|
11741
|
-
|
|
11742
11744
|
// src/ocr/api-key-rotation.ts
|
|
11743
11745
|
var AllKeysCoolingDownError = class extends Error {
|
|
11744
11746
|
waitMs;
|
|
@@ -12010,7 +12012,25 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12010
12012
|
markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
|
|
12011
12013
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
12012
12014
|
if ((0, import_path6.extname)(absInput).toLowerCase() !== ".pdf") {
|
|
12013
|
-
|
|
12015
|
+
const convertEmitter = new ConvertEventEmitter();
|
|
12016
|
+
if (options.onEvent) {
|
|
12017
|
+
convertEmitter.setListener((evt) => {
|
|
12018
|
+
if (evt.type === "install" || evt.type === "validate" || evt.type === "error") {
|
|
12019
|
+
try {
|
|
12020
|
+
;
|
|
12021
|
+
options.onEvent(evt);
|
|
12022
|
+
} catch {
|
|
12023
|
+
}
|
|
12024
|
+
}
|
|
12025
|
+
});
|
|
12026
|
+
}
|
|
12027
|
+
if (options.sofficePath) {
|
|
12028
|
+
const sofficeDir = (0, import_path6.dirname)(options.sofficePath);
|
|
12029
|
+
process.env.PATH = `${sofficeDir}${import_path6.delimiter}${process.env.PATH ?? ""}`;
|
|
12030
|
+
convertEmitter.validate("soffice_found", "\uC9C1\uC811 \uC9C0\uC815\uB41C LibreOffice \uACBD\uB85C \uC0AC\uC6A9", { sofficePath: options.sofficePath });
|
|
12031
|
+
} else {
|
|
12032
|
+
await resolveSoffice(convertEmitter, options.autoInstallLibreOffice ?? false);
|
|
12033
|
+
}
|
|
12014
12034
|
workingPdfPath = (0, import_path6.join)(workspaceDir, `${stem}.pdf`);
|
|
12015
12035
|
const inputBuffer = await (0, import_promises4.readFile)(absInput);
|
|
12016
12036
|
const out = await convertBuffer(inputBuffer, ".pdf");
|
|
@@ -12583,6 +12603,16 @@ function ensureSupportedInput(path) {
|
|
|
12583
12603
|
}
|
|
12584
12604
|
function normalizePipelineError(err, stage) {
|
|
12585
12605
|
if (err instanceof UnifiedOcrError) return err;
|
|
12606
|
+
if (err instanceof ConvertError) {
|
|
12607
|
+
const codeMap = {
|
|
12608
|
+
SOFFICE_NOT_FOUND: "SOFFICE_NOT_FOUND",
|
|
12609
|
+
CONVERT_FAILED: "CONVERT_FAILED",
|
|
12610
|
+
TIMEOUT: "CONVERT_FAILED",
|
|
12611
|
+
UNSUPPORTED_PLATFORM: "CONVERT_FAILED",
|
|
12612
|
+
UNSUPPORTED_FORMAT: "UNSUPPORTED_INPUT"
|
|
12613
|
+
};
|
|
12614
|
+
return new UnifiedOcrError(codeMap[err.code] ?? "CONVERT_FAILED", stage, err.message);
|
|
12615
|
+
}
|
|
12586
12616
|
const message = err instanceof Error ? err.message : String(err);
|
|
12587
12617
|
const codeByStage = {
|
|
12588
12618
|
convert: "CONVERT_FAILED",
|