@clazic/kordoc 2.6.0 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -130,7 +130,7 @@ var VERSION, KordocError, SAFE_HREF_RE;
130
130
  var init_utils = __esm({
131
131
  "src/utils.ts"() {
132
132
  "use strict";
133
- VERSION = true ? "2.5.2" : "0.0.0-dev";
133
+ VERSION = true ? "2.6.0" : "0.0.0-dev";
134
134
  KordocError = class extends Error {
135
135
  code;
136
136
  stage;
@@ -3196,7 +3196,7 @@ __export(index_exports, {
3196
3196
  runUnifiedOcrPipeline: () => runUnifiedOcrPipeline
3197
3197
  });
3198
3198
  module.exports = __toCommonJS(index_exports);
3199
- var import_promises4 = require("fs/promises");
3199
+ var import_promises5 = require("fs/promises");
3200
3200
 
3201
3201
  // src/detect.ts
3202
3202
  var import_jszip = __toESM(require("jszip"), 1);
@@ -11265,7 +11265,7 @@ async function markdownToXlsx(markdown, options) {
11265
11265
  }
11266
11266
 
11267
11267
  // src/convert/index.ts
11268
- var import_promises2 = require("fs/promises");
11268
+ var import_promises3 = require("fs/promises");
11269
11269
  init_utils();
11270
11270
 
11271
11271
  // src/convert/libreoffice.ts
@@ -11280,6 +11280,204 @@ var ConvertError = class extends Error {
11280
11280
  }
11281
11281
  };
11282
11282
 
11283
+ // src/convert/installer.ts
11284
+ var import_os3 = require("os");
11285
+ var import_path5 = require("path");
11286
+ var import_promises2 = require("fs/promises");
11287
+ var import_fs4 = require("fs");
11288
+ var import_child_process4 = require("child_process");
11289
+ var CACHE_DIR = (0, import_path5.join)((0, import_os3.homedir)(), ".cache", "kordoc", "libreoffice");
11290
+ var VERSION_FILE = (0, import_path5.join)(CACHE_DIR, "version");
11291
+ var PACKAGES = {
11292
+ darwin: {
11293
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/mac/x86_64/LibreOffice_24.8.4_MacOS_x86-64.dmg",
11294
+ binPath: "LibreOffice.app/Contents/MacOS/soffice",
11295
+ sizeMb: 300
11296
+ },
11297
+ linux: {
11298
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/deb/x86_64/LibreOffice_24.8.4_Linux_x86-64_deb.tar.gz",
11299
+ binPath: "opt/libreoffice24.8/program/soffice",
11300
+ sizeMb: 200
11301
+ },
11302
+ win32: {
11303
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
11304
+ binPath: "LibreOffice/program/soffice.exe",
11305
+ sizeMb: 350
11306
+ }
11307
+ };
11308
+ async function findInPath() {
11309
+ try {
11310
+ const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
11311
+ await runCommand2("soffice", ["--version"]);
11312
+ return "soffice";
11313
+ } catch {
11314
+ return null;
11315
+ }
11316
+ }
11317
+ async function findInCache() {
11318
+ const cachedBin = (0, import_path5.join)(CACHE_DIR, "bin", "soffice");
11319
+ try {
11320
+ await (0, import_promises2.access)(cachedBin);
11321
+ return cachedBin;
11322
+ } catch {
11323
+ return null;
11324
+ }
11325
+ }
11326
+ async function downloadWithProgress(url, dest, totalBytes, onProgress) {
11327
+ const response = await fetch(url);
11328
+ if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
11329
+ const file = (0, import_fs4.createWriteStream)(dest);
11330
+ const reader = response.body.getReader();
11331
+ let downloaded = 0;
11332
+ try {
11333
+ while (true) {
11334
+ const { done, value } = await reader.read();
11335
+ if (done) break;
11336
+ file.write(value);
11337
+ downloaded += value.length;
11338
+ onProgress?.(downloaded, totalBytes);
11339
+ }
11340
+ } finally {
11341
+ file.end();
11342
+ reader.releaseLock();
11343
+ }
11344
+ }
11345
+ async function installForPlatform(pkg, onProgress) {
11346
+ const platform = process.platform;
11347
+ await (0, import_promises2.mkdir)(CACHE_DIR, { recursive: true });
11348
+ const downloadPath = (0, import_path5.join)(CACHE_DIR, `download-${Date.now()}`);
11349
+ await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
11350
+ try {
11351
+ if (platform === "darwin") {
11352
+ return await installMacOS(pkg, downloadPath);
11353
+ } else if (platform === "linux") {
11354
+ return await installLinux(pkg, downloadPath);
11355
+ } else if (platform === "win32") {
11356
+ return await installWindows(pkg, downloadPath);
11357
+ }
11358
+ } catch (err) {
11359
+ await (0, import_promises2.rm)(downloadPath, { force: true });
11360
+ throw err;
11361
+ }
11362
+ throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
11363
+ }
11364
+ async function installMacOS(pkg, downloadPath) {
11365
+ const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
11366
+ await new Promise((resolve4, reject) => {
11367
+ const child = (0, import_child_process4.spawn)("hdiutil", ["attach", "-nobrowse", "-mountpoint", mountPoint, downloadPath]);
11368
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328")));
11369
+ });
11370
+ try {
11371
+ const appSource = (0, import_path5.join)(mountPoint, "LibreOffice.app");
11372
+ const appDest = (0, import_path5.join)(CACHE_DIR, "LibreOffice.app");
11373
+ await new Promise((resolve4, reject) => {
11374
+ const child = (0, import_child_process4.spawn)("cp", ["-R", appSource, appDest]);
11375
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
11376
+ });
11377
+ } finally {
11378
+ await new Promise((resolve4) => {
11379
+ const child = (0, import_child_process4.spawn)("hdiutil", ["detach", mountPoint]);
11380
+ child.on("close", () => resolve4());
11381
+ });
11382
+ }
11383
+ await (0, import_promises2.rm)(downloadPath, { force: true });
11384
+ return await createSymlink((0, import_path5.join)(CACHE_DIR, pkg.binPath));
11385
+ }
11386
+ async function installLinux(pkg, downloadPath) {
11387
+ const extractDir = (0, import_path5.join)(CACHE_DIR, `extract-${Date.now()}`);
11388
+ await (0, import_promises2.mkdir)(extractDir, { recursive: true });
11389
+ await new Promise((resolve4, reject) => {
11390
+ const child = (0, import_child_process4.spawn)("tar", ["xzf", downloadPath, "-C", extractDir]);
11391
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
11392
+ });
11393
+ const debsDir = (0, import_path5.join)(extractDir, "DEBS");
11394
+ try {
11395
+ await (0, import_promises2.access)(debsDir);
11396
+ const entries = await (await import("fs/promises")).readdir(debsDir);
11397
+ for (const entry of entries) {
11398
+ if (entry.endsWith(".deb")) {
11399
+ await new Promise((resolve4, reject) => {
11400
+ const child = (0, import_child_process4.spawn)("dpkg-deb", ["-x", (0, import_path5.join)(debsDir, entry), CACHE_DIR]);
11401
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
11402
+ });
11403
+ }
11404
+ }
11405
+ } catch {
11406
+ }
11407
+ await (0, import_promises2.rm)(downloadPath, { force: true });
11408
+ await (0, import_promises2.rm)(extractDir, { recursive: true, force: true });
11409
+ return await createSymlink((0, import_path5.join)(CACHE_DIR, pkg.binPath));
11410
+ }
11411
+ async function installWindows(pkg, downloadPath) {
11412
+ await new Promise((resolve4, reject) => {
11413
+ const child = (0, import_child_process4.spawn)("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
11414
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
11415
+ });
11416
+ await (0, import_promises2.rm)(downloadPath, { force: true });
11417
+ return (0, import_path5.join)(CACHE_DIR, pkg.binPath);
11418
+ }
11419
+ async function createSymlink(actualBin) {
11420
+ const binDir = (0, import_path5.join)(CACHE_DIR, "bin");
11421
+ await (0, import_promises2.mkdir)(binDir, { recursive: true });
11422
+ const linkBin = (0, import_path5.join)(binDir, "soffice");
11423
+ try {
11424
+ await (0, import_promises2.symlink)(actualBin, linkBin);
11425
+ } catch {
11426
+ }
11427
+ process.env.PATH = `${binDir}${import_path5.delimiter}${process.env.PATH}`;
11428
+ return linkBin;
11429
+ }
11430
+ async function installLibreOffice(onProgress) {
11431
+ const platform = process.platform;
11432
+ const pkg = PACKAGES[platform];
11433
+ if (!pkg) {
11434
+ throw new ConvertError(
11435
+ "UNSUPPORTED_PLATFORM",
11436
+ `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
11437
+ );
11438
+ }
11439
+ return await installForPlatform(pkg, onProgress);
11440
+ }
11441
+ async function resolveSoffice(emitter, autoInstall = true) {
11442
+ emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11443
+ const inPath = await findInPath();
11444
+ if (inPath) {
11445
+ emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
11446
+ return inPath;
11447
+ }
11448
+ const inCache = await findInCache();
11449
+ if (inCache) {
11450
+ emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
11451
+ return inCache;
11452
+ }
11453
+ if (!autoInstall) {
11454
+ emitter.error(
11455
+ "validate",
11456
+ "SOFFICE_NOT_FOUND",
11457
+ "LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
11458
+ "\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
11459
+ );
11460
+ throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
11461
+ }
11462
+ emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
11463
+ try {
11464
+ const installed = await installLibreOffice((downloaded, total) => {
11465
+ const percent = Math.round(downloaded / total * 100);
11466
+ emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
11467
+ percent,
11468
+ downloadedBytes: downloaded,
11469
+ totalBytes: total
11470
+ });
11471
+ });
11472
+ emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
11473
+ return installed;
11474
+ } catch (err) {
11475
+ const errorMsg = err instanceof Error ? err.message : String(err);
11476
+ emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
11477
+ throw err;
11478
+ }
11479
+ }
11480
+
11283
11481
  // src/convert/libreoffice.ts
11284
11482
  var libreConvert = import_libreoffice_convert.default.convert;
11285
11483
  async function assertSofficeAvailable() {
@@ -11316,6 +11514,54 @@ async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
11316
11514
  });
11317
11515
  }
11318
11516
 
11517
+ // src/convert/events.ts
11518
+ var ConvertEventEmitter = class {
11519
+ listener = null;
11520
+ /** 이벤트 리스너 등록 */
11521
+ setListener(listener) {
11522
+ this.listener = listener;
11523
+ }
11524
+ /** 이벤트 발송 */
11525
+ emit(event) {
11526
+ try {
11527
+ this.listener?.(event);
11528
+ } catch {
11529
+ }
11530
+ }
11531
+ /** 타입 안전한 헬퍼: detect 이벤트 */
11532
+ detect(stage, message, meta) {
11533
+ this.emit({ type: "detect", stage, message, ...meta });
11534
+ }
11535
+ /** 타입 안전한 헬퍼: validate 이벤트 */
11536
+ validate(stage, message, meta) {
11537
+ this.emit({ type: "validate", stage, message, ...meta });
11538
+ }
11539
+ /** 타입 안전한 헬퍼: install 이벤트 */
11540
+ install(stage, message, meta) {
11541
+ this.emit({ type: "install", stage, message, ...meta });
11542
+ }
11543
+ /** 타입 안전한 헬퍼: convert 진행 이벤트 */
11544
+ progress(percent, message) {
11545
+ this.emit({ type: "convert", stage: "convert_progress", message, percent });
11546
+ }
11547
+ /** 타입 안전한 헬퍼: convert 시작 */
11548
+ convertStart(message) {
11549
+ this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
11550
+ }
11551
+ /** 타입 안전한 헬퍼: convert 완료 */
11552
+ convertDone(message) {
11553
+ this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
11554
+ }
11555
+ /** 타입 안전한 헬퍼: 완료 이벤트 */
11556
+ complete(result) {
11557
+ this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
11558
+ }
11559
+ /** 타입 안전한 헬퍼: 에러 이벤트 */
11560
+ error(stage, code, message, suggestion) {
11561
+ this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
11562
+ }
11563
+ };
11564
+
11319
11565
  // src/convert/index.ts
11320
11566
  var isConverting = false;
11321
11567
  var queue = [];
@@ -11340,81 +11586,129 @@ async function acquireConvertLock() {
11340
11586
  });
11341
11587
  }
11342
11588
  async function convertToPdf(input, options) {
11343
- let buffer;
11344
- try {
11345
- if (typeof input === "string") {
11346
- buffer = await (0, import_promises2.readFile)(input);
11347
- } else if (Buffer.isBuffer(input)) {
11348
- buffer = input;
11349
- } else {
11350
- buffer = Buffer.from(input);
11351
- }
11352
- } catch (err) {
11353
- return {
11354
- success: false,
11355
- code: "PARSE_ERROR",
11356
- error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
11357
- stage: "detect"
11358
- };
11359
- }
11360
- const MAX_FILE_SIZE = 500 * 1024 * 1024;
11361
- if (buffer.length > MAX_FILE_SIZE) {
11362
- return {
11363
- success: false,
11364
- code: "FILE_TOO_LARGE",
11365
- error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
11366
- stage: "detect"
11367
- };
11589
+ const emitter = new ConvertEventEmitter();
11590
+ if (options?.onEvent) {
11591
+ emitter.setListener(options.onEvent);
11368
11592
  }
11369
- const format = detectFormat(toArrayBuffer(buffer));
11370
- if (format !== "hwp" && format !== "hwpx") {
11371
- return {
11372
- success: false,
11373
- code: "UNSUPPORTED_FORMAT",
11374
- error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
11375
- stage: "detect"
11376
- };
11593
+ if (options?.onProgress) {
11594
+ const legacyProgress = options.onProgress;
11595
+ emitter.setListener((event) => {
11596
+ if (event.type === "convert" && event.stage === "convert_progress") {
11597
+ legacyProgress(event.percent, event.message);
11598
+ }
11599
+ });
11377
11600
  }
11378
11601
  try {
11379
- await assertSofficeAvailable();
11380
- } catch (err) {
11381
- if (err instanceof ConvertError) {
11602
+ emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
11603
+ let buffer;
11604
+ try {
11605
+ if (typeof input === "string") {
11606
+ buffer = await (0, import_promises3.readFile)(input);
11607
+ } else if (Buffer.isBuffer(input)) {
11608
+ buffer = input;
11609
+ } else {
11610
+ buffer = Buffer.from(input);
11611
+ }
11612
+ } catch (err) {
11613
+ emitter.error(
11614
+ "detect",
11615
+ "PARSE_ERROR",
11616
+ `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
11617
+ );
11382
11618
  return {
11383
11619
  success: false,
11384
- code: err.code,
11385
- error: err.message,
11386
- stage: "validate"
11620
+ code: "PARSE_ERROR",
11621
+ error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
11622
+ stage: "detect"
11387
11623
  };
11388
11624
  }
11389
- throw err;
11390
- }
11391
- const releaseLock = await acquireConvertLock();
11392
- try {
11393
- options?.onProgress?.(10, "convert");
11394
- const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
11395
- options?.onProgress?.(100, "done");
11396
- return {
11397
- success: true,
11398
- pdf: new Uint8Array(pdf),
11399
- sourceFormat: format
11400
- };
11401
- } catch (err) {
11402
- if (err instanceof ConvertError) {
11625
+ const MAX_FILE_SIZE = 500 * 1024 * 1024;
11626
+ if (buffer.length > MAX_FILE_SIZE) {
11627
+ emitter.error(
11628
+ "detect",
11629
+ "FILE_TOO_LARGE",
11630
+ `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
11631
+ );
11632
+ return {
11633
+ success: false,
11634
+ code: "FILE_TOO_LARGE",
11635
+ error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
11636
+ stage: "detect"
11637
+ };
11638
+ }
11639
+ const format = detectFormat(toArrayBuffer(buffer));
11640
+ emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
11641
+ if (format !== "hwp" && format !== "hwpx") {
11642
+ emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
11403
11643
  return {
11404
11644
  success: false,
11405
- code: err.code,
11406
- error: err.message,
11645
+ code: "UNSUPPORTED_FORMAT",
11646
+ error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
11647
+ stage: "detect"
11648
+ };
11649
+ }
11650
+ emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11651
+ let sofficePath;
11652
+ try {
11653
+ sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
11654
+ } catch (err) {
11655
+ if (err instanceof ConvertError) {
11656
+ return {
11657
+ success: false,
11658
+ code: err.code,
11659
+ error: err.message,
11660
+ stage: "validate"
11661
+ };
11662
+ }
11663
+ throw err;
11664
+ }
11665
+ const releaseLock = await acquireConvertLock();
11666
+ try {
11667
+ emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
11668
+ emitter.progress(10, "\uBCC0\uD658 \uC911...");
11669
+ const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
11670
+ emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
11671
+ emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
11672
+ const result = {
11673
+ success: true,
11674
+ pdf: new Uint8Array(pdf),
11675
+ sourceFormat: format
11676
+ };
11677
+ emitter.complete({
11678
+ sourceFormat: format,
11679
+ pdfSize: pdf.length
11680
+ });
11681
+ return result;
11682
+ } catch (err) {
11683
+ if (err instanceof ConvertError) {
11684
+ emitter.error("convert", err.code, err.message);
11685
+ return {
11686
+ success: false,
11687
+ code: err.code,
11688
+ error: err.message,
11689
+ stage: "convert"
11690
+ };
11691
+ }
11692
+ const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
11693
+ emitter.error("convert", classifyError(err), errorMsg);
11694
+ return {
11695
+ success: false,
11696
+ code: classifyError(err),
11697
+ error: errorMsg,
11407
11698
  stage: "convert"
11408
11699
  };
11700
+ } finally {
11701
+ releaseLock();
11409
11702
  }
11703
+ } catch (unexpectedErr) {
11704
+ const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
11705
+ emitter.error("convert", "PARSE_ERROR", errorMsg);
11410
11706
  return {
11411
11707
  success: false,
11412
- code: classifyError(err),
11413
- error: err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328",
11708
+ code: "PARSE_ERROR",
11709
+ error: errorMsg,
11414
11710
  stage: "convert"
11415
11711
  };
11416
- } finally {
11417
- releaseLock();
11418
11712
  }
11419
11713
  }
11420
11714
  async function convertHwpToPdf(input, options) {
@@ -11539,9 +11833,9 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11539
11833
  };
11540
11834
 
11541
11835
  // src/pipeline/unified-ocr.ts
11542
- var import_promises3 = require("fs/promises");
11543
- var import_path5 = require("path");
11544
- var import_child_process4 = require("child_process");
11836
+ var import_promises4 = require("fs/promises");
11837
+ var import_path6 = require("path");
11838
+ var import_child_process5 = require("child_process");
11545
11839
  var import_node_perf_hooks = require("perf_hooks");
11546
11840
  init_logger();
11547
11841
 
@@ -11675,15 +11969,15 @@ function elapsedMs(startAt) {
11675
11969
  return Math.round(import_node_perf_hooks.performance.now() - startAt);
11676
11970
  }
11677
11971
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11678
- const absInput = (0, import_path5.resolve)(inputPath);
11679
- const stem = (0, import_path5.basename)(absInput, (0, import_path5.extname)(absInput));
11680
- const workspaceDir = (0, import_path5.resolve)(options.workspaceDir ?? (0, import_path5.join)((0, import_path5.dirname)(absInput), `${stem}_ocr_workspace`));
11681
- const imagesDir = (0, import_path5.join)(workspaceDir, "images");
11682
- const rawDir = (0, import_path5.join)(workspaceDir, "ocr", "raw");
11683
- const diffDir = (0, import_path5.join)(workspaceDir, "ocr", "diff");
11684
- const outputPath = (0, import_path5.resolve)(options.outputPath ?? (0, import_path5.join)((0, import_path5.dirname)(absInput), `${stem}.md`));
11685
- const reportPath = (0, import_path5.join)(workspaceDir, "run-report.json");
11686
- const modelCachePath = (0, import_path5.join)((0, import_path5.dirname)(absInput), ".kordoc-model-cache.json");
11972
+ const absInput = (0, import_path6.resolve)(inputPath);
11973
+ const stem = (0, import_path6.basename)(absInput, (0, import_path6.extname)(absInput));
11974
+ const workspaceDir = (0, import_path6.resolve)(options.workspaceDir ?? (0, import_path6.join)((0, import_path6.dirname)(absInput), `${stem}_ocr_workspace`));
11975
+ const imagesDir = (0, import_path6.join)(workspaceDir, "images");
11976
+ const rawDir = (0, import_path6.join)(workspaceDir, "ocr", "raw");
11977
+ const diffDir = (0, import_path6.join)(workspaceDir, "ocr", "diff");
11978
+ const outputPath = (0, import_path6.resolve)(options.outputPath ?? (0, import_path6.join)((0, import_path6.dirname)(absInput), `${stem}.md`));
11979
+ const reportPath = (0, import_path6.join)(workspaceDir, "run-report.json");
11980
+ const modelCachePath = (0, import_path6.join)((0, import_path6.dirname)(absInput), ".kordoc-model-cache.json");
11687
11981
  const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
11688
11982
  const timeoutMs = options.timeoutMs ?? 6e4;
11689
11983
  const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
@@ -11697,9 +11991,9 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11697
11991
  const keyPool = ApiKeyRotationPool.fromEnv();
11698
11992
  const runId = options.runId ?? generateRunId("ocr");
11699
11993
  const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
11700
- await (0, import_promises3.mkdir)(imagesDir, { recursive: true });
11701
- await (0, import_promises3.mkdir)(rawDir, { recursive: true });
11702
- await (0, import_promises3.mkdir)(diffDir, { recursive: true });
11994
+ await (0, import_promises4.mkdir)(imagesDir, { recursive: true });
11995
+ await (0, import_promises4.mkdir)(rawDir, { recursive: true });
11996
+ await (0, import_promises4.mkdir)(diffDir, { recursive: true });
11703
11997
  const timingsMs = {};
11704
11998
  const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
11705
11999
  const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
@@ -11715,12 +12009,12 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11715
12009
  currentStage = "convert";
11716
12010
  markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
11717
12011
  logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
11718
- if ((0, import_path5.extname)(absInput).toLowerCase() !== ".pdf") {
12012
+ if ((0, import_path6.extname)(absInput).toLowerCase() !== ".pdf") {
11719
12013
  await assertSofficeAvailable();
11720
- workingPdfPath = (0, import_path5.join)(workspaceDir, `${stem}.pdf`);
11721
- const inputBuffer = await (0, import_promises3.readFile)(absInput);
12014
+ workingPdfPath = (0, import_path6.join)(workspaceDir, `${stem}.pdf`);
12015
+ const inputBuffer = await (0, import_promises4.readFile)(absInput);
11722
12016
  const out = await convertBuffer(inputBuffer, ".pdf");
11723
- await (0, import_promises3.writeFile)(workingPdfPath, out);
12017
+ await (0, import_promises4.writeFile)(workingPdfPath, out);
11724
12018
  }
11725
12019
  timingsMs.convert = elapsedMs(convertStart);
11726
12020
  markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
@@ -11731,10 +12025,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11731
12025
  if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
11732
12026
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11733
12027
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
11734
- await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, (0, import_path5.join)(imagesDir, "page")]);
11735
- const firstFiles = (await (0, import_promises3.readdir)(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
12028
+ await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, (0, import_path6.join)(imagesDir, "page")]);
12029
+ const firstFiles = (await (0, import_promises4.readdir)(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
11736
12030
  if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
11737
- const probeImage = (0, import_path5.join)(imagesDir, firstFiles[0]);
12031
+ const probeImage = (0, import_path6.join)(imagesDir, firstFiles[0]);
11738
12032
  markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
11739
12033
  const probeStart = import_node_perf_hooks.performance.now();
11740
12034
  currentStage = "probe";
@@ -11780,7 +12074,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11780
12074
  try {
11781
12075
  await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
11782
12076
  if (totalPages > 1) {
11783
- for await (const item of renderPdfToPngStream(workingPdfPath, (0, import_path5.join)(imagesDir, "page"), dpi, totalPages, 2)) {
12077
+ for await (const item of renderPdfToPngStream(workingPdfPath, (0, import_path6.join)(imagesDir, "page"), dpi, totalPages, 2)) {
11784
12078
  await queue2.enqueue(item);
11785
12079
  renderDone++;
11786
12080
  markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
@@ -11830,8 +12124,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11830
12124
  const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
11831
12125
  const rawPagePaths = [];
11832
12126
  for (const [pageNum, markdown] of sortedEntries) {
11833
- const pagePath = (0, import_path5.join)(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
11834
- await (0, import_promises3.writeFile)(pagePath, markdown, "utf-8");
12127
+ const pagePath = (0, import_path6.join)(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
12128
+ await (0, import_promises4.writeFile)(pagePath, markdown, "utf-8");
11835
12129
  rawPagePaths.push(pagePath);
11836
12130
  }
11837
12131
  const mergeStart = import_node_perf_hooks.performance.now();
@@ -11839,7 +12133,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11839
12133
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
11840
12134
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
11841
12135
  const merged = await mergeMarkdownPages(rawPagePaths);
11842
- await (0, import_promises3.writeFile)(outputPath, merged, "utf-8");
12136
+ await (0, import_promises4.writeFile)(outputPath, merged, "utf-8");
11843
12137
  timingsMs.merge = elapsedMs(mergeStart);
11844
12138
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
11845
12139
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
@@ -11855,7 +12149,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11855
12149
  timingsMs,
11856
12150
  modelCachePath
11857
12151
  };
11858
- await (0, import_promises3.writeFile)(reportPath, JSON.stringify(report, null, 2), "utf-8");
12152
+ await (0, import_promises4.writeFile)(reportPath, JSON.stringify(report, null, 2), "utf-8");
11859
12153
  logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
11860
12154
  return { outputPath, reportPath, selectedModel };
11861
12155
  } catch (err) {
@@ -11930,7 +12224,7 @@ async function getPdfPageCount(pdfPath) {
11930
12224
  return n;
11931
12225
  }
11932
12226
  async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, startPage = 1) {
11933
- const imagesDir = (0, import_path5.dirname)(prefixPath);
12227
+ const imagesDir = (0, import_path6.dirname)(prefixPath);
11934
12228
  for (let page = startPage; page <= totalPages; page++) {
11935
12229
  try {
11936
12230
  await runCommand("pdftoppm", [
@@ -11944,9 +12238,9 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
11944
12238
  pdfPath,
11945
12239
  prefixPath
11946
12240
  ]);
11947
- const files = await (0, import_promises3.readdir)(imagesDir);
12241
+ const files = await (0, import_promises4.readdir)(imagesDir);
11948
12242
  const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
11949
- const imagePath = (0, import_path5.join)(imagesDir, pageFiles[pageFiles.length - 1]);
12243
+ const imagePath = (0, import_path6.join)(imagesDir, pageFiles[pageFiles.length - 1]);
11950
12244
  yield { pageNumber: page, imagePath };
11951
12245
  } catch (err) {
11952
12246
  yield {
@@ -11959,7 +12253,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
11959
12253
  }
11960
12254
  async function runCommand(cmd, args) {
11961
12255
  await new Promise((resolvePromise, reject) => {
11962
- const child = (0, import_child_process4.spawn)(cmd, args, { stdio: "pipe" });
12256
+ const child = (0, import_child_process5.spawn)(cmd, args, { stdio: "pipe" });
11963
12257
  let stderr = "";
11964
12258
  child.stderr.on("data", (d) => {
11965
12259
  stderr += String(d);
@@ -11973,7 +12267,7 @@ async function runCommand(cmd, args) {
11973
12267
  }
11974
12268
  async function runCommandWithStdout(cmd, args) {
11975
12269
  return await new Promise((resolvePromise, reject) => {
11976
- const child = (0, import_child_process4.spawn)(cmd, args, { stdio: "pipe" });
12270
+ const child = (0, import_child_process5.spawn)(cmd, args, { stdio: "pipe" });
11977
12271
  let stdout = "";
11978
12272
  let stderr = "";
11979
12273
  child.stdout.on("data", (d) => {
@@ -12062,7 +12356,7 @@ function startParallelProbeRuns(input) {
12062
12356
  }
12063
12357
  async function loadModelCache(path) {
12064
12358
  try {
12065
- const raw = await (0, import_promises3.readFile)(path, "utf-8");
12359
+ const raw = await (0, import_promises4.readFile)(path, "utf-8");
12066
12360
  return JSON.parse(raw);
12067
12361
  } catch {
12068
12362
  return null;
@@ -12093,7 +12387,7 @@ async function updateModelCache(path, probes) {
12093
12387
  }
12094
12388
  }
12095
12389
  current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
12096
- await (0, import_promises3.writeFile)(path, JSON.stringify(current, null, 2), "utf-8");
12390
+ await (0, import_promises4.writeFile)(path, JSON.stringify(current, null, 2), "utf-8");
12097
12391
  }
12098
12392
  async function ocrWorkerPool(input) {
12099
12393
  const { queue: queue2, workerCount, ocrInput, onPageDone } = input;
@@ -12153,7 +12447,7 @@ async function ocrImageWithFallback(input) {
12153
12447
  async function mergeMarkdownPages(paths) {
12154
12448
  const out = [];
12155
12449
  for (let i = 0; i < paths.length; i++) {
12156
- const txt = (await (0, import_promises3.readFile)(paths[i], "utf-8")).trim();
12450
+ const txt = (await (0, import_promises4.readFile)(paths[i], "utf-8")).trim();
12157
12451
  if (!txt) continue;
12158
12452
  out.push(txt);
12159
12453
  }
@@ -12269,7 +12563,7 @@ async function ocrImageViaNim(input) {
12269
12563
  throw new UnifiedOcrError("OCR_FAILED", "ocr", `OCR \uC7AC\uC2DC\uB3C4 \uCD08\uACFC: ${lastErr}`);
12270
12564
  }
12271
12565
  async function encodeBase64(path) {
12272
- const b = await (0, import_promises3.readFile)(path);
12566
+ const b = await (0, import_promises4.readFile)(path);
12273
12567
  return b.toString("base64");
12274
12568
  }
12275
12569
  function stripCodeFence3(text) {
@@ -12281,7 +12575,7 @@ async function delay(ms) {
12281
12575
  await new Promise((resolvePromise) => setTimeout(resolvePromise, ms));
12282
12576
  }
12283
12577
  function ensureSupportedInput(path) {
12284
- const ext = (0, import_path5.extname)(path).toLowerCase();
12578
+ const ext = (0, import_path6.extname)(path).toLowerCase();
12285
12579
  const allowed = /* @__PURE__ */ new Set([".pdf", ".hwp", ".hwpx", ".docx", ".xlsx"]);
12286
12580
  if (!allowed.has(ext)) {
12287
12581
  throw new UnifiedOcrError("UNSUPPORTED_INPUT", "convert", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uC785\uB825 \uD3EC\uB9F7: ${ext}`);
@@ -12308,7 +12602,7 @@ async function parse2(input, options) {
12308
12602
  let buffer;
12309
12603
  if (typeof input === "string") {
12310
12604
  try {
12311
- const buf = await (0, import_promises4.readFile)(input);
12605
+ const buf = await (0, import_promises5.readFile)(input);
12312
12606
  buffer = toArrayBuffer(buf);
12313
12607
  } catch (err) {
12314
12608
  const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;