@clazic/kordoc 2.6.0 → 2.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TND4YFBV.js → chunk-4X5JCZFZ.js} +2 -2
- package/dist/{chunk-TS3F57LY.js → chunk-BZPZXI66.js} +349 -66
- package/dist/chunk-BZPZXI66.js.map +1 -0
- package/dist/cli.js +46 -11
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +394 -100
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +63 -7
- package/dist/index.d.ts +63 -7
- package/dist/index.js +380 -86
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-F66K7PXH.js → utils-56QT5C33.js} +2 -2
- package/dist/{watch-2S5ULHAM.js → watch-HRNMJWSE.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-TS3F57LY.js.map +0 -1
- /package/dist/{chunk-TND4YFBV.js.map → chunk-4X5JCZFZ.js.map} +0 -0
- /package/dist/{utils-F66K7PXH.js.map → utils-56QT5C33.js.map} +0 -0
- /package/dist/{watch-2S5ULHAM.js.map → watch-HRNMJWSE.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -134,7 +134,7 @@ var VERSION, KordocError, SAFE_HREF_RE;
|
|
|
134
134
|
var init_utils = __esm({
|
|
135
135
|
"src/utils.ts"() {
|
|
136
136
|
"use strict";
|
|
137
|
-
VERSION = true ? "2.
|
|
137
|
+
VERSION = true ? "2.6.0" : "0.0.0-dev";
|
|
138
138
|
KordocError = class extends Error {
|
|
139
139
|
code;
|
|
140
140
|
stage;
|
|
@@ -11255,6 +11255,204 @@ var ConvertError = class extends Error {
|
|
|
11255
11255
|
}
|
|
11256
11256
|
};
|
|
11257
11257
|
|
|
11258
|
+
// src/convert/installer.ts
|
|
11259
|
+
import { homedir } from "os";
|
|
11260
|
+
import { join as join4, delimiter } from "path";
|
|
11261
|
+
import { mkdir, access, symlink, rm } from "fs/promises";
|
|
11262
|
+
import { createWriteStream } from "fs";
|
|
11263
|
+
import { spawn as spawn2 } from "child_process";
|
|
11264
|
+
var CACHE_DIR = join4(homedir(), ".cache", "kordoc", "libreoffice");
|
|
11265
|
+
var VERSION_FILE = join4(CACHE_DIR, "version");
|
|
11266
|
+
var PACKAGES = {
|
|
11267
|
+
darwin: {
|
|
11268
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/mac/x86_64/LibreOffice_24.8.4_MacOS_x86-64.dmg",
|
|
11269
|
+
binPath: "LibreOffice.app/Contents/MacOS/soffice",
|
|
11270
|
+
sizeMb: 300
|
|
11271
|
+
},
|
|
11272
|
+
linux: {
|
|
11273
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/deb/x86_64/LibreOffice_24.8.4_Linux_x86-64_deb.tar.gz",
|
|
11274
|
+
binPath: "opt/libreoffice24.8/program/soffice",
|
|
11275
|
+
sizeMb: 200
|
|
11276
|
+
},
|
|
11277
|
+
win32: {
|
|
11278
|
+
url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
|
|
11279
|
+
binPath: "LibreOffice/program/soffice.exe",
|
|
11280
|
+
sizeMb: 350
|
|
11281
|
+
}
|
|
11282
|
+
};
|
|
11283
|
+
async function findInPath() {
|
|
11284
|
+
try {
|
|
11285
|
+
const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
|
|
11286
|
+
await runCommand2("soffice", ["--version"]);
|
|
11287
|
+
return "soffice";
|
|
11288
|
+
} catch {
|
|
11289
|
+
return null;
|
|
11290
|
+
}
|
|
11291
|
+
}
|
|
11292
|
+
async function findInCache() {
|
|
11293
|
+
const cachedBin = join4(CACHE_DIR, "bin", "soffice");
|
|
11294
|
+
try {
|
|
11295
|
+
await access(cachedBin);
|
|
11296
|
+
return cachedBin;
|
|
11297
|
+
} catch {
|
|
11298
|
+
return null;
|
|
11299
|
+
}
|
|
11300
|
+
}
|
|
11301
|
+
async function downloadWithProgress(url, dest, totalBytes, onProgress) {
|
|
11302
|
+
const response = await fetch(url);
|
|
11303
|
+
if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
|
|
11304
|
+
const file = createWriteStream(dest);
|
|
11305
|
+
const reader = response.body.getReader();
|
|
11306
|
+
let downloaded = 0;
|
|
11307
|
+
try {
|
|
11308
|
+
while (true) {
|
|
11309
|
+
const { done, value } = await reader.read();
|
|
11310
|
+
if (done) break;
|
|
11311
|
+
file.write(value);
|
|
11312
|
+
downloaded += value.length;
|
|
11313
|
+
onProgress?.(downloaded, totalBytes);
|
|
11314
|
+
}
|
|
11315
|
+
} finally {
|
|
11316
|
+
file.end();
|
|
11317
|
+
reader.releaseLock();
|
|
11318
|
+
}
|
|
11319
|
+
}
|
|
11320
|
+
async function installForPlatform(pkg, onProgress) {
|
|
11321
|
+
const platform = process.platform;
|
|
11322
|
+
await mkdir(CACHE_DIR, { recursive: true });
|
|
11323
|
+
const downloadPath = join4(CACHE_DIR, `download-${Date.now()}`);
|
|
11324
|
+
await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
|
|
11325
|
+
try {
|
|
11326
|
+
if (platform === "darwin") {
|
|
11327
|
+
return await installMacOS(pkg, downloadPath);
|
|
11328
|
+
} else if (platform === "linux") {
|
|
11329
|
+
return await installLinux(pkg, downloadPath);
|
|
11330
|
+
} else if (platform === "win32") {
|
|
11331
|
+
return await installWindows(pkg, downloadPath);
|
|
11332
|
+
}
|
|
11333
|
+
} catch (err) {
|
|
11334
|
+
await rm(downloadPath, { force: true });
|
|
11335
|
+
throw err;
|
|
11336
|
+
}
|
|
11337
|
+
throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
|
|
11338
|
+
}
|
|
11339
|
+
async function installMacOS(pkg, downloadPath) {
|
|
11340
|
+
const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
|
|
11341
|
+
await new Promise((resolve4, reject) => {
|
|
11342
|
+
const child = spawn2("hdiutil", ["attach", "-nobrowse", "-mountpoint", mountPoint, downloadPath]);
|
|
11343
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328")));
|
|
11344
|
+
});
|
|
11345
|
+
try {
|
|
11346
|
+
const appSource = join4(mountPoint, "LibreOffice.app");
|
|
11347
|
+
const appDest = join4(CACHE_DIR, "LibreOffice.app");
|
|
11348
|
+
await new Promise((resolve4, reject) => {
|
|
11349
|
+
const child = spawn2("cp", ["-R", appSource, appDest]);
|
|
11350
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
|
|
11351
|
+
});
|
|
11352
|
+
} finally {
|
|
11353
|
+
await new Promise((resolve4) => {
|
|
11354
|
+
const child = spawn2("hdiutil", ["detach", mountPoint]);
|
|
11355
|
+
child.on("close", () => resolve4());
|
|
11356
|
+
});
|
|
11357
|
+
}
|
|
11358
|
+
await rm(downloadPath, { force: true });
|
|
11359
|
+
return await createSymlink(join4(CACHE_DIR, pkg.binPath));
|
|
11360
|
+
}
|
|
11361
|
+
async function installLinux(pkg, downloadPath) {
|
|
11362
|
+
const extractDir = join4(CACHE_DIR, `extract-${Date.now()}`);
|
|
11363
|
+
await mkdir(extractDir, { recursive: true });
|
|
11364
|
+
await new Promise((resolve4, reject) => {
|
|
11365
|
+
const child = spawn2("tar", ["xzf", downloadPath, "-C", extractDir]);
|
|
11366
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
|
|
11367
|
+
});
|
|
11368
|
+
const debsDir = join4(extractDir, "DEBS");
|
|
11369
|
+
try {
|
|
11370
|
+
await access(debsDir);
|
|
11371
|
+
const entries = await (await import("fs/promises")).readdir(debsDir);
|
|
11372
|
+
for (const entry of entries) {
|
|
11373
|
+
if (entry.endsWith(".deb")) {
|
|
11374
|
+
await new Promise((resolve4, reject) => {
|
|
11375
|
+
const child = spawn2("dpkg-deb", ["-x", join4(debsDir, entry), CACHE_DIR]);
|
|
11376
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
|
|
11377
|
+
});
|
|
11378
|
+
}
|
|
11379
|
+
}
|
|
11380
|
+
} catch {
|
|
11381
|
+
}
|
|
11382
|
+
await rm(downloadPath, { force: true });
|
|
11383
|
+
await rm(extractDir, { recursive: true, force: true });
|
|
11384
|
+
return await createSymlink(join4(CACHE_DIR, pkg.binPath));
|
|
11385
|
+
}
|
|
11386
|
+
async function installWindows(pkg, downloadPath) {
|
|
11387
|
+
await new Promise((resolve4, reject) => {
|
|
11388
|
+
const child = spawn2("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
|
|
11389
|
+
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
|
|
11390
|
+
});
|
|
11391
|
+
await rm(downloadPath, { force: true });
|
|
11392
|
+
return join4(CACHE_DIR, pkg.binPath);
|
|
11393
|
+
}
|
|
11394
|
+
async function createSymlink(actualBin) {
|
|
11395
|
+
const binDir = join4(CACHE_DIR, "bin");
|
|
11396
|
+
await mkdir(binDir, { recursive: true });
|
|
11397
|
+
const linkBin = join4(binDir, "soffice");
|
|
11398
|
+
try {
|
|
11399
|
+
await symlink(actualBin, linkBin);
|
|
11400
|
+
} catch {
|
|
11401
|
+
}
|
|
11402
|
+
process.env.PATH = `${binDir}${delimiter}${process.env.PATH}`;
|
|
11403
|
+
return linkBin;
|
|
11404
|
+
}
|
|
11405
|
+
async function installLibreOffice(onProgress) {
|
|
11406
|
+
const platform = process.platform;
|
|
11407
|
+
const pkg = PACKAGES[platform];
|
|
11408
|
+
if (!pkg) {
|
|
11409
|
+
throw new ConvertError(
|
|
11410
|
+
"UNSUPPORTED_PLATFORM",
|
|
11411
|
+
`${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
|
|
11412
|
+
);
|
|
11413
|
+
}
|
|
11414
|
+
return await installForPlatform(pkg, onProgress);
|
|
11415
|
+
}
|
|
11416
|
+
async function resolveSoffice(emitter, autoInstall = true) {
|
|
11417
|
+
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11418
|
+
const inPath = await findInPath();
|
|
11419
|
+
if (inPath) {
|
|
11420
|
+
emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
|
|
11421
|
+
return inPath;
|
|
11422
|
+
}
|
|
11423
|
+
const inCache = await findInCache();
|
|
11424
|
+
if (inCache) {
|
|
11425
|
+
emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
|
|
11426
|
+
return inCache;
|
|
11427
|
+
}
|
|
11428
|
+
if (!autoInstall) {
|
|
11429
|
+
emitter.error(
|
|
11430
|
+
"validate",
|
|
11431
|
+
"SOFFICE_NOT_FOUND",
|
|
11432
|
+
"LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
|
|
11433
|
+
"\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
|
|
11434
|
+
);
|
|
11435
|
+
throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
|
|
11436
|
+
}
|
|
11437
|
+
emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
|
|
11438
|
+
try {
|
|
11439
|
+
const installed = await installLibreOffice((downloaded, total) => {
|
|
11440
|
+
const percent = Math.round(downloaded / total * 100);
|
|
11441
|
+
emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
|
|
11442
|
+
percent,
|
|
11443
|
+
downloadedBytes: downloaded,
|
|
11444
|
+
totalBytes: total
|
|
11445
|
+
});
|
|
11446
|
+
});
|
|
11447
|
+
emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
|
|
11448
|
+
return installed;
|
|
11449
|
+
} catch (err) {
|
|
11450
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
11451
|
+
emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
|
|
11452
|
+
throw err;
|
|
11453
|
+
}
|
|
11454
|
+
}
|
|
11455
|
+
|
|
11258
11456
|
// src/convert/libreoffice.ts
|
|
11259
11457
|
var libreConvert = libre.convert;
|
|
11260
11458
|
async function assertSofficeAvailable() {
|
|
@@ -11291,6 +11489,54 @@ async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
|
11291
11489
|
});
|
|
11292
11490
|
}
|
|
11293
11491
|
|
|
11492
|
+
// src/convert/events.ts
|
|
11493
|
+
var ConvertEventEmitter = class {
|
|
11494
|
+
listener = null;
|
|
11495
|
+
/** 이벤트 리스너 등록 */
|
|
11496
|
+
setListener(listener) {
|
|
11497
|
+
this.listener = listener;
|
|
11498
|
+
}
|
|
11499
|
+
/** 이벤트 발송 */
|
|
11500
|
+
emit(event) {
|
|
11501
|
+
try {
|
|
11502
|
+
this.listener?.(event);
|
|
11503
|
+
} catch {
|
|
11504
|
+
}
|
|
11505
|
+
}
|
|
11506
|
+
/** 타입 안전한 헬퍼: detect 이벤트 */
|
|
11507
|
+
detect(stage, message, meta) {
|
|
11508
|
+
this.emit({ type: "detect", stage, message, ...meta });
|
|
11509
|
+
}
|
|
11510
|
+
/** 타입 안전한 헬퍼: validate 이벤트 */
|
|
11511
|
+
validate(stage, message, meta) {
|
|
11512
|
+
this.emit({ type: "validate", stage, message, ...meta });
|
|
11513
|
+
}
|
|
11514
|
+
/** 타입 안전한 헬퍼: install 이벤트 */
|
|
11515
|
+
install(stage, message, meta) {
|
|
11516
|
+
this.emit({ type: "install", stage, message, ...meta });
|
|
11517
|
+
}
|
|
11518
|
+
/** 타입 안전한 헬퍼: convert 진행 이벤트 */
|
|
11519
|
+
progress(percent, message) {
|
|
11520
|
+
this.emit({ type: "convert", stage: "convert_progress", message, percent });
|
|
11521
|
+
}
|
|
11522
|
+
/** 타입 안전한 헬퍼: convert 시작 */
|
|
11523
|
+
convertStart(message) {
|
|
11524
|
+
this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
|
|
11525
|
+
}
|
|
11526
|
+
/** 타입 안전한 헬퍼: convert 완료 */
|
|
11527
|
+
convertDone(message) {
|
|
11528
|
+
this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
|
|
11529
|
+
}
|
|
11530
|
+
/** 타입 안전한 헬퍼: 완료 이벤트 */
|
|
11531
|
+
complete(result) {
|
|
11532
|
+
this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
|
|
11533
|
+
}
|
|
11534
|
+
/** 타입 안전한 헬퍼: 에러 이벤트 */
|
|
11535
|
+
error(stage, code, message, suggestion) {
|
|
11536
|
+
this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
|
|
11537
|
+
}
|
|
11538
|
+
};
|
|
11539
|
+
|
|
11294
11540
|
// src/convert/index.ts
|
|
11295
11541
|
var isConverting = false;
|
|
11296
11542
|
var queue = [];
|
|
@@ -11315,81 +11561,129 @@ async function acquireConvertLock() {
|
|
|
11315
11561
|
});
|
|
11316
11562
|
}
|
|
11317
11563
|
async function convertToPdf(input, options) {
|
|
11318
|
-
|
|
11319
|
-
|
|
11320
|
-
|
|
11321
|
-
buffer = await readFile(input);
|
|
11322
|
-
} else if (Buffer.isBuffer(input)) {
|
|
11323
|
-
buffer = input;
|
|
11324
|
-
} else {
|
|
11325
|
-
buffer = Buffer.from(input);
|
|
11326
|
-
}
|
|
11327
|
-
} catch (err) {
|
|
11328
|
-
return {
|
|
11329
|
-
success: false,
|
|
11330
|
-
code: "PARSE_ERROR",
|
|
11331
|
-
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
11332
|
-
stage: "detect"
|
|
11333
|
-
};
|
|
11334
|
-
}
|
|
11335
|
-
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
11336
|
-
if (buffer.length > MAX_FILE_SIZE) {
|
|
11337
|
-
return {
|
|
11338
|
-
success: false,
|
|
11339
|
-
code: "FILE_TOO_LARGE",
|
|
11340
|
-
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
11341
|
-
stage: "detect"
|
|
11342
|
-
};
|
|
11564
|
+
const emitter = new ConvertEventEmitter();
|
|
11565
|
+
if (options?.onEvent) {
|
|
11566
|
+
emitter.setListener(options.onEvent);
|
|
11343
11567
|
}
|
|
11344
|
-
|
|
11345
|
-
|
|
11346
|
-
|
|
11347
|
-
|
|
11348
|
-
|
|
11349
|
-
|
|
11350
|
-
|
|
11351
|
-
};
|
|
11568
|
+
if (options?.onProgress) {
|
|
11569
|
+
const legacyProgress = options.onProgress;
|
|
11570
|
+
emitter.setListener((event) => {
|
|
11571
|
+
if (event.type === "convert" && event.stage === "convert_progress") {
|
|
11572
|
+
legacyProgress(event.percent, event.message);
|
|
11573
|
+
}
|
|
11574
|
+
});
|
|
11352
11575
|
}
|
|
11353
11576
|
try {
|
|
11354
|
-
|
|
11355
|
-
|
|
11356
|
-
|
|
11577
|
+
emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
|
|
11578
|
+
let buffer;
|
|
11579
|
+
try {
|
|
11580
|
+
if (typeof input === "string") {
|
|
11581
|
+
buffer = await readFile(input);
|
|
11582
|
+
} else if (Buffer.isBuffer(input)) {
|
|
11583
|
+
buffer = input;
|
|
11584
|
+
} else {
|
|
11585
|
+
buffer = Buffer.from(input);
|
|
11586
|
+
}
|
|
11587
|
+
} catch (err) {
|
|
11588
|
+
emitter.error(
|
|
11589
|
+
"detect",
|
|
11590
|
+
"PARSE_ERROR",
|
|
11591
|
+
`\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
|
|
11592
|
+
);
|
|
11357
11593
|
return {
|
|
11358
11594
|
success: false,
|
|
11359
|
-
code:
|
|
11360
|
-
error: err.message
|
|
11361
|
-
stage: "
|
|
11595
|
+
code: "PARSE_ERROR",
|
|
11596
|
+
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
11597
|
+
stage: "detect"
|
|
11362
11598
|
};
|
|
11363
11599
|
}
|
|
11364
|
-
|
|
11365
|
-
|
|
11366
|
-
|
|
11367
|
-
|
|
11368
|
-
|
|
11369
|
-
|
|
11370
|
-
|
|
11371
|
-
|
|
11372
|
-
|
|
11373
|
-
|
|
11374
|
-
|
|
11375
|
-
|
|
11376
|
-
|
|
11377
|
-
|
|
11600
|
+
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
11601
|
+
if (buffer.length > MAX_FILE_SIZE) {
|
|
11602
|
+
emitter.error(
|
|
11603
|
+
"detect",
|
|
11604
|
+
"FILE_TOO_LARGE",
|
|
11605
|
+
`\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
|
|
11606
|
+
);
|
|
11607
|
+
return {
|
|
11608
|
+
success: false,
|
|
11609
|
+
code: "FILE_TOO_LARGE",
|
|
11610
|
+
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
11611
|
+
stage: "detect"
|
|
11612
|
+
};
|
|
11613
|
+
}
|
|
11614
|
+
const format = detectFormat(toArrayBuffer(buffer));
|
|
11615
|
+
emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
|
|
11616
|
+
if (format !== "hwp" && format !== "hwpx") {
|
|
11617
|
+
emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
|
|
11378
11618
|
return {
|
|
11379
11619
|
success: false,
|
|
11380
|
-
code:
|
|
11381
|
-
error:
|
|
11620
|
+
code: "UNSUPPORTED_FORMAT",
|
|
11621
|
+
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
11622
|
+
stage: "detect"
|
|
11623
|
+
};
|
|
11624
|
+
}
|
|
11625
|
+
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11626
|
+
let sofficePath;
|
|
11627
|
+
try {
|
|
11628
|
+
sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
|
|
11629
|
+
} catch (err) {
|
|
11630
|
+
if (err instanceof ConvertError) {
|
|
11631
|
+
return {
|
|
11632
|
+
success: false,
|
|
11633
|
+
code: err.code,
|
|
11634
|
+
error: err.message,
|
|
11635
|
+
stage: "validate"
|
|
11636
|
+
};
|
|
11637
|
+
}
|
|
11638
|
+
throw err;
|
|
11639
|
+
}
|
|
11640
|
+
const releaseLock = await acquireConvertLock();
|
|
11641
|
+
try {
|
|
11642
|
+
emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
|
|
11643
|
+
emitter.progress(10, "\uBCC0\uD658 \uC911...");
|
|
11644
|
+
const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
|
|
11645
|
+
emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
|
|
11646
|
+
emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
|
|
11647
|
+
const result = {
|
|
11648
|
+
success: true,
|
|
11649
|
+
pdf: new Uint8Array(pdf),
|
|
11650
|
+
sourceFormat: format
|
|
11651
|
+
};
|
|
11652
|
+
emitter.complete({
|
|
11653
|
+
sourceFormat: format,
|
|
11654
|
+
pdfSize: pdf.length
|
|
11655
|
+
});
|
|
11656
|
+
return result;
|
|
11657
|
+
} catch (err) {
|
|
11658
|
+
if (err instanceof ConvertError) {
|
|
11659
|
+
emitter.error("convert", err.code, err.message);
|
|
11660
|
+
return {
|
|
11661
|
+
success: false,
|
|
11662
|
+
code: err.code,
|
|
11663
|
+
error: err.message,
|
|
11664
|
+
stage: "convert"
|
|
11665
|
+
};
|
|
11666
|
+
}
|
|
11667
|
+
const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
|
|
11668
|
+
emitter.error("convert", classifyError(err), errorMsg);
|
|
11669
|
+
return {
|
|
11670
|
+
success: false,
|
|
11671
|
+
code: classifyError(err),
|
|
11672
|
+
error: errorMsg,
|
|
11382
11673
|
stage: "convert"
|
|
11383
11674
|
};
|
|
11675
|
+
} finally {
|
|
11676
|
+
releaseLock();
|
|
11384
11677
|
}
|
|
11678
|
+
} catch (unexpectedErr) {
|
|
11679
|
+
const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
|
|
11680
|
+
emitter.error("convert", "PARSE_ERROR", errorMsg);
|
|
11385
11681
|
return {
|
|
11386
11682
|
success: false,
|
|
11387
|
-
code:
|
|
11388
|
-
error:
|
|
11683
|
+
code: "PARSE_ERROR",
|
|
11684
|
+
error: errorMsg,
|
|
11389
11685
|
stage: "convert"
|
|
11390
11686
|
};
|
|
11391
|
-
} finally {
|
|
11392
|
-
releaseLock();
|
|
11393
11687
|
}
|
|
11394
11688
|
}
|
|
11395
11689
|
async function convertHwpToPdf(input, options) {
|
|
@@ -11514,9 +11808,9 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11514
11808
|
};
|
|
11515
11809
|
|
|
11516
11810
|
// src/pipeline/unified-ocr.ts
|
|
11517
|
-
import { mkdir, readdir, readFile as readFile2, stat, writeFile } from "fs/promises";
|
|
11518
|
-
import { basename as basename2, dirname as dirname3, extname, join as
|
|
11519
|
-
import { spawn as
|
|
11811
|
+
import { mkdir as mkdir2, readdir, readFile as readFile2, stat, writeFile as writeFile2 } from "fs/promises";
|
|
11812
|
+
import { basename as basename2, dirname as dirname3, extname, join as join5, resolve as resolve3 } from "path";
|
|
11813
|
+
import { spawn as spawn3 } from "child_process";
|
|
11520
11814
|
import { performance } from "perf_hooks";
|
|
11521
11815
|
init_logger();
|
|
11522
11816
|
|
|
@@ -11652,13 +11946,13 @@ function elapsedMs(startAt) {
|
|
|
11652
11946
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11653
11947
|
const absInput = resolve3(inputPath);
|
|
11654
11948
|
const stem = basename2(absInput, extname(absInput));
|
|
11655
|
-
const workspaceDir = resolve3(options.workspaceDir ??
|
|
11656
|
-
const imagesDir =
|
|
11657
|
-
const rawDir =
|
|
11658
|
-
const diffDir =
|
|
11659
|
-
const outputPath = resolve3(options.outputPath ??
|
|
11660
|
-
const reportPath =
|
|
11661
|
-
const modelCachePath =
|
|
11949
|
+
const workspaceDir = resolve3(options.workspaceDir ?? join5(dirname3(absInput), `${stem}_ocr_workspace`));
|
|
11950
|
+
const imagesDir = join5(workspaceDir, "images");
|
|
11951
|
+
const rawDir = join5(workspaceDir, "ocr", "raw");
|
|
11952
|
+
const diffDir = join5(workspaceDir, "ocr", "diff");
|
|
11953
|
+
const outputPath = resolve3(options.outputPath ?? join5(dirname3(absInput), `${stem}.md`));
|
|
11954
|
+
const reportPath = join5(workspaceDir, "run-report.json");
|
|
11955
|
+
const modelCachePath = join5(dirname3(absInput), ".kordoc-model-cache.json");
|
|
11662
11956
|
const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
|
|
11663
11957
|
const timeoutMs = options.timeoutMs ?? 6e4;
|
|
11664
11958
|
const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
|
|
@@ -11672,9 +11966,9 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11672
11966
|
const keyPool = ApiKeyRotationPool.fromEnv();
|
|
11673
11967
|
const runId = options.runId ?? generateRunId("ocr");
|
|
11674
11968
|
const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
|
|
11675
|
-
await
|
|
11676
|
-
await
|
|
11677
|
-
await
|
|
11969
|
+
await mkdir2(imagesDir, { recursive: true });
|
|
11970
|
+
await mkdir2(rawDir, { recursive: true });
|
|
11971
|
+
await mkdir2(diffDir, { recursive: true });
|
|
11678
11972
|
const timingsMs = {};
|
|
11679
11973
|
const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
|
|
11680
11974
|
const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
|
|
@@ -11692,10 +11986,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11692
11986
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
11693
11987
|
if (extname(absInput).toLowerCase() !== ".pdf") {
|
|
11694
11988
|
await assertSofficeAvailable();
|
|
11695
|
-
workingPdfPath =
|
|
11989
|
+
workingPdfPath = join5(workspaceDir, `${stem}.pdf`);
|
|
11696
11990
|
const inputBuffer = await readFile2(absInput);
|
|
11697
11991
|
const out = await convertBuffer(inputBuffer, ".pdf");
|
|
11698
|
-
await
|
|
11992
|
+
await writeFile2(workingPdfPath, out);
|
|
11699
11993
|
}
|
|
11700
11994
|
timingsMs.convert = elapsedMs(convertStart);
|
|
11701
11995
|
markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
|
|
@@ -11706,10 +12000,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11706
12000
|
if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11707
12001
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
11708
12002
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
|
|
11709
|
-
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath,
|
|
12003
|
+
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join5(imagesDir, "page")]);
|
|
11710
12004
|
const firstFiles = (await readdir(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
11711
12005
|
if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
|
|
11712
|
-
const probeImage =
|
|
12006
|
+
const probeImage = join5(imagesDir, firstFiles[0]);
|
|
11713
12007
|
markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
|
|
11714
12008
|
const probeStart = performance.now();
|
|
11715
12009
|
currentStage = "probe";
|
|
@@ -11755,7 +12049,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11755
12049
|
try {
|
|
11756
12050
|
await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
|
|
11757
12051
|
if (totalPages > 1) {
|
|
11758
|
-
for await (const item of renderPdfToPngStream(workingPdfPath,
|
|
12052
|
+
for await (const item of renderPdfToPngStream(workingPdfPath, join5(imagesDir, "page"), dpi, totalPages, 2)) {
|
|
11759
12053
|
await queue2.enqueue(item);
|
|
11760
12054
|
renderDone++;
|
|
11761
12055
|
markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
|
|
@@ -11805,8 +12099,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11805
12099
|
const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
|
|
11806
12100
|
const rawPagePaths = [];
|
|
11807
12101
|
for (const [pageNum, markdown] of sortedEntries) {
|
|
11808
|
-
const pagePath =
|
|
11809
|
-
await
|
|
12102
|
+
const pagePath = join5(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
|
|
12103
|
+
await writeFile2(pagePath, markdown, "utf-8");
|
|
11810
12104
|
rawPagePaths.push(pagePath);
|
|
11811
12105
|
}
|
|
11812
12106
|
const mergeStart = performance.now();
|
|
@@ -11814,7 +12108,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11814
12108
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11815
12109
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
11816
12110
|
const merged = await mergeMarkdownPages(rawPagePaths);
|
|
11817
|
-
await
|
|
12111
|
+
await writeFile2(outputPath, merged, "utf-8");
|
|
11818
12112
|
timingsMs.merge = elapsedMs(mergeStart);
|
|
11819
12113
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
11820
12114
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
@@ -11830,7 +12124,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11830
12124
|
timingsMs,
|
|
11831
12125
|
modelCachePath
|
|
11832
12126
|
};
|
|
11833
|
-
await
|
|
12127
|
+
await writeFile2(reportPath, JSON.stringify(report, null, 2), "utf-8");
|
|
11834
12128
|
logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
|
|
11835
12129
|
return { outputPath, reportPath, selectedModel };
|
|
11836
12130
|
} catch (err) {
|
|
@@ -11921,7 +12215,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
11921
12215
|
]);
|
|
11922
12216
|
const files = await readdir(imagesDir);
|
|
11923
12217
|
const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
11924
|
-
const imagePath =
|
|
12218
|
+
const imagePath = join5(imagesDir, pageFiles[pageFiles.length - 1]);
|
|
11925
12219
|
yield { pageNumber: page, imagePath };
|
|
11926
12220
|
} catch (err) {
|
|
11927
12221
|
yield {
|
|
@@ -11934,7 +12228,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
11934
12228
|
}
|
|
11935
12229
|
async function runCommand(cmd, args) {
|
|
11936
12230
|
await new Promise((resolvePromise, reject) => {
|
|
11937
|
-
const child =
|
|
12231
|
+
const child = spawn3(cmd, args, { stdio: "pipe" });
|
|
11938
12232
|
let stderr = "";
|
|
11939
12233
|
child.stderr.on("data", (d) => {
|
|
11940
12234
|
stderr += String(d);
|
|
@@ -11948,7 +12242,7 @@ async function runCommand(cmd, args) {
|
|
|
11948
12242
|
}
|
|
11949
12243
|
async function runCommandWithStdout(cmd, args) {
|
|
11950
12244
|
return await new Promise((resolvePromise, reject) => {
|
|
11951
|
-
const child =
|
|
12245
|
+
const child = spawn3(cmd, args, { stdio: "pipe" });
|
|
11952
12246
|
let stdout = "";
|
|
11953
12247
|
let stderr = "";
|
|
11954
12248
|
child.stdout.on("data", (d) => {
|
|
@@ -12068,7 +12362,7 @@ async function updateModelCache(path, probes) {
|
|
|
12068
12362
|
}
|
|
12069
12363
|
}
|
|
12070
12364
|
current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
12071
|
-
await
|
|
12365
|
+
await writeFile2(path, JSON.stringify(current, null, 2), "utf-8");
|
|
12072
12366
|
}
|
|
12073
12367
|
async function ocrWorkerPool(input) {
|
|
12074
12368
|
const { queue: queue2, workerCount, ocrInput, onPageDone } = input;
|