@clazic/kordoc 2.6.0 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -134,7 +134,7 @@ var VERSION, KordocError, SAFE_HREF_RE;
134
134
  var init_utils = __esm({
135
135
  "src/utils.ts"() {
136
136
  "use strict";
137
- VERSION = true ? "2.5.2" : "0.0.0-dev";
137
+ VERSION = true ? "2.6.0" : "0.0.0-dev";
138
138
  KordocError = class extends Error {
139
139
  code;
140
140
  stage;
@@ -11255,6 +11255,204 @@ var ConvertError = class extends Error {
11255
11255
  }
11256
11256
  };
11257
11257
 
11258
+ // src/convert/installer.ts
11259
+ import { homedir } from "os";
11260
+ import { join as join4, delimiter } from "path";
11261
+ import { mkdir, access, symlink, rm } from "fs/promises";
11262
+ import { createWriteStream } from "fs";
11263
+ import { spawn as spawn2 } from "child_process";
11264
+ var CACHE_DIR = join4(homedir(), ".cache", "kordoc", "libreoffice");
11265
+ var VERSION_FILE = join4(CACHE_DIR, "version");
11266
+ var PACKAGES = {
11267
+ darwin: {
11268
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/mac/x86_64/LibreOffice_24.8.4_MacOS_x86-64.dmg",
11269
+ binPath: "LibreOffice.app/Contents/MacOS/soffice",
11270
+ sizeMb: 300
11271
+ },
11272
+ linux: {
11273
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/deb/x86_64/LibreOffice_24.8.4_Linux_x86-64_deb.tar.gz",
11274
+ binPath: "opt/libreoffice24.8/program/soffice",
11275
+ sizeMb: 200
11276
+ },
11277
+ win32: {
11278
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
11279
+ binPath: "LibreOffice/program/soffice.exe",
11280
+ sizeMb: 350
11281
+ }
11282
+ };
11283
+ async function findInPath() {
11284
+ try {
11285
+ const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
11286
+ await runCommand2("soffice", ["--version"]);
11287
+ return "soffice";
11288
+ } catch {
11289
+ return null;
11290
+ }
11291
+ }
11292
+ async function findInCache() {
11293
+ const cachedBin = join4(CACHE_DIR, "bin", "soffice");
11294
+ try {
11295
+ await access(cachedBin);
11296
+ return cachedBin;
11297
+ } catch {
11298
+ return null;
11299
+ }
11300
+ }
11301
+ async function downloadWithProgress(url, dest, totalBytes, onProgress) {
11302
+ const response = await fetch(url);
11303
+ if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
11304
+ const file = createWriteStream(dest);
11305
+ const reader = response.body.getReader();
11306
+ let downloaded = 0;
11307
+ try {
11308
+ while (true) {
11309
+ const { done, value } = await reader.read();
11310
+ if (done) break;
11311
+ file.write(value);
11312
+ downloaded += value.length;
11313
+ onProgress?.(downloaded, totalBytes);
11314
+ }
11315
+ } finally {
11316
+ file.end();
11317
+ reader.releaseLock();
11318
+ }
11319
+ }
11320
+ async function installForPlatform(pkg, onProgress) {
11321
+ const platform = process.platform;
11322
+ await mkdir(CACHE_DIR, { recursive: true });
11323
+ const downloadPath = join4(CACHE_DIR, `download-${Date.now()}`);
11324
+ await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
11325
+ try {
11326
+ if (platform === "darwin") {
11327
+ return await installMacOS(pkg, downloadPath);
11328
+ } else if (platform === "linux") {
11329
+ return await installLinux(pkg, downloadPath);
11330
+ } else if (platform === "win32") {
11331
+ return await installWindows(pkg, downloadPath);
11332
+ }
11333
+ } catch (err) {
11334
+ await rm(downloadPath, { force: true });
11335
+ throw err;
11336
+ }
11337
+ throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
11338
+ }
11339
+ async function installMacOS(pkg, downloadPath) {
11340
+ const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
11341
+ await new Promise((resolve4, reject) => {
11342
+ const child = spawn2("hdiutil", ["attach", "-nobrowse", "-mountpoint", mountPoint, downloadPath]);
11343
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328")));
11344
+ });
11345
+ try {
11346
+ const appSource = join4(mountPoint, "LibreOffice.app");
11347
+ const appDest = join4(CACHE_DIR, "LibreOffice.app");
11348
+ await new Promise((resolve4, reject) => {
11349
+ const child = spawn2("cp", ["-R", appSource, appDest]);
11350
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
11351
+ });
11352
+ } finally {
11353
+ await new Promise((resolve4) => {
11354
+ const child = spawn2("hdiutil", ["detach", mountPoint]);
11355
+ child.on("close", () => resolve4());
11356
+ });
11357
+ }
11358
+ await rm(downloadPath, { force: true });
11359
+ return await createSymlink(join4(CACHE_DIR, pkg.binPath));
11360
+ }
11361
+ async function installLinux(pkg, downloadPath) {
11362
+ const extractDir = join4(CACHE_DIR, `extract-${Date.now()}`);
11363
+ await mkdir(extractDir, { recursive: true });
11364
+ await new Promise((resolve4, reject) => {
11365
+ const child = spawn2("tar", ["xzf", downloadPath, "-C", extractDir]);
11366
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
11367
+ });
11368
+ const debsDir = join4(extractDir, "DEBS");
11369
+ try {
11370
+ await access(debsDir);
11371
+ const entries = await (await import("fs/promises")).readdir(debsDir);
11372
+ for (const entry of entries) {
11373
+ if (entry.endsWith(".deb")) {
11374
+ await new Promise((resolve4, reject) => {
11375
+ const child = spawn2("dpkg-deb", ["-x", join4(debsDir, entry), CACHE_DIR]);
11376
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
11377
+ });
11378
+ }
11379
+ }
11380
+ } catch {
11381
+ }
11382
+ await rm(downloadPath, { force: true });
11383
+ await rm(extractDir, { recursive: true, force: true });
11384
+ return await createSymlink(join4(CACHE_DIR, pkg.binPath));
11385
+ }
11386
+ async function installWindows(pkg, downloadPath) {
11387
+ await new Promise((resolve4, reject) => {
11388
+ const child = spawn2("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
11389
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
11390
+ });
11391
+ await rm(downloadPath, { force: true });
11392
+ return join4(CACHE_DIR, pkg.binPath);
11393
+ }
11394
+ async function createSymlink(actualBin) {
11395
+ const binDir = join4(CACHE_DIR, "bin");
11396
+ await mkdir(binDir, { recursive: true });
11397
+ const linkBin = join4(binDir, "soffice");
11398
+ try {
11399
+ await symlink(actualBin, linkBin);
11400
+ } catch {
11401
+ }
11402
+ process.env.PATH = `${binDir}${delimiter}${process.env.PATH}`;
11403
+ return linkBin;
11404
+ }
11405
+ async function installLibreOffice(onProgress) {
11406
+ const platform = process.platform;
11407
+ const pkg = PACKAGES[platform];
11408
+ if (!pkg) {
11409
+ throw new ConvertError(
11410
+ "UNSUPPORTED_PLATFORM",
11411
+ `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
11412
+ );
11413
+ }
11414
+ return await installForPlatform(pkg, onProgress);
11415
+ }
11416
+ async function resolveSoffice(emitter, autoInstall = true) {
11417
+ emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11418
+ const inPath = await findInPath();
11419
+ if (inPath) {
11420
+ emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
11421
+ return inPath;
11422
+ }
11423
+ const inCache = await findInCache();
11424
+ if (inCache) {
11425
+ emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
11426
+ return inCache;
11427
+ }
11428
+ if (!autoInstall) {
11429
+ emitter.error(
11430
+ "validate",
11431
+ "SOFFICE_NOT_FOUND",
11432
+ "LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
11433
+ "\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
11434
+ );
11435
+ throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
11436
+ }
11437
+ emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
11438
+ try {
11439
+ const installed = await installLibreOffice((downloaded, total) => {
11440
+ const percent = Math.round(downloaded / total * 100);
11441
+ emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
11442
+ percent,
11443
+ downloadedBytes: downloaded,
11444
+ totalBytes: total
11445
+ });
11446
+ });
11447
+ emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
11448
+ return installed;
11449
+ } catch (err) {
11450
+ const errorMsg = err instanceof Error ? err.message : String(err);
11451
+ emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
11452
+ throw err;
11453
+ }
11454
+ }
11455
+
11258
11456
  // src/convert/libreoffice.ts
11259
11457
  var libreConvert = libre.convert;
11260
11458
  async function assertSofficeAvailable() {
@@ -11291,6 +11489,54 @@ async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
11291
11489
  });
11292
11490
  }
11293
11491
 
11492
+ // src/convert/events.ts
11493
+ var ConvertEventEmitter = class {
11494
+ listener = null;
11495
+ /** 이벤트 리스너 등록 */
11496
+ setListener(listener) {
11497
+ this.listener = listener;
11498
+ }
11499
+ /** 이벤트 발송 */
11500
+ emit(event) {
11501
+ try {
11502
+ this.listener?.(event);
11503
+ } catch {
11504
+ }
11505
+ }
11506
+ /** 타입 안전한 헬퍼: detect 이벤트 */
11507
+ detect(stage, message, meta) {
11508
+ this.emit({ type: "detect", stage, message, ...meta });
11509
+ }
11510
+ /** 타입 안전한 헬퍼: validate 이벤트 */
11511
+ validate(stage, message, meta) {
11512
+ this.emit({ type: "validate", stage, message, ...meta });
11513
+ }
11514
+ /** 타입 안전한 헬퍼: install 이벤트 */
11515
+ install(stage, message, meta) {
11516
+ this.emit({ type: "install", stage, message, ...meta });
11517
+ }
11518
+ /** 타입 안전한 헬퍼: convert 진행 이벤트 */
11519
+ progress(percent, message) {
11520
+ this.emit({ type: "convert", stage: "convert_progress", message, percent });
11521
+ }
11522
+ /** 타입 안전한 헬퍼: convert 시작 */
11523
+ convertStart(message) {
11524
+ this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
11525
+ }
11526
+ /** 타입 안전한 헬퍼: convert 완료 */
11527
+ convertDone(message) {
11528
+ this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
11529
+ }
11530
+ /** 타입 안전한 헬퍼: 완료 이벤트 */
11531
+ complete(result) {
11532
+ this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
11533
+ }
11534
+ /** 타입 안전한 헬퍼: 에러 이벤트 */
11535
+ error(stage, code, message, suggestion) {
11536
+ this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
11537
+ }
11538
+ };
11539
+
11294
11540
  // src/convert/index.ts
11295
11541
  var isConverting = false;
11296
11542
  var queue = [];
@@ -11315,81 +11561,129 @@ async function acquireConvertLock() {
11315
11561
  });
11316
11562
  }
11317
11563
  async function convertToPdf(input, options) {
11318
- let buffer;
11319
- try {
11320
- if (typeof input === "string") {
11321
- buffer = await readFile(input);
11322
- } else if (Buffer.isBuffer(input)) {
11323
- buffer = input;
11324
- } else {
11325
- buffer = Buffer.from(input);
11326
- }
11327
- } catch (err) {
11328
- return {
11329
- success: false,
11330
- code: "PARSE_ERROR",
11331
- error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
11332
- stage: "detect"
11333
- };
11334
- }
11335
- const MAX_FILE_SIZE = 500 * 1024 * 1024;
11336
- if (buffer.length > MAX_FILE_SIZE) {
11337
- return {
11338
- success: false,
11339
- code: "FILE_TOO_LARGE",
11340
- error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
11341
- stage: "detect"
11342
- };
11564
+ const emitter = new ConvertEventEmitter();
11565
+ if (options?.onEvent) {
11566
+ emitter.setListener(options.onEvent);
11343
11567
  }
11344
- const format = detectFormat(toArrayBuffer(buffer));
11345
- if (format !== "hwp" && format !== "hwpx") {
11346
- return {
11347
- success: false,
11348
- code: "UNSUPPORTED_FORMAT",
11349
- error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
11350
- stage: "detect"
11351
- };
11568
+ if (options?.onProgress) {
11569
+ const legacyProgress = options.onProgress;
11570
+ emitter.setListener((event) => {
11571
+ if (event.type === "convert" && event.stage === "convert_progress") {
11572
+ legacyProgress(event.percent, event.message);
11573
+ }
11574
+ });
11352
11575
  }
11353
11576
  try {
11354
- await assertSofficeAvailable();
11355
- } catch (err) {
11356
- if (err instanceof ConvertError) {
11577
+ emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
11578
+ let buffer;
11579
+ try {
11580
+ if (typeof input === "string") {
11581
+ buffer = await readFile(input);
11582
+ } else if (Buffer.isBuffer(input)) {
11583
+ buffer = input;
11584
+ } else {
11585
+ buffer = Buffer.from(input);
11586
+ }
11587
+ } catch (err) {
11588
+ emitter.error(
11589
+ "detect",
11590
+ "PARSE_ERROR",
11591
+ `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
11592
+ );
11357
11593
  return {
11358
11594
  success: false,
11359
- code: err.code,
11360
- error: err.message,
11361
- stage: "validate"
11595
+ code: "PARSE_ERROR",
11596
+ error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
11597
+ stage: "detect"
11362
11598
  };
11363
11599
  }
11364
- throw err;
11365
- }
11366
- const releaseLock = await acquireConvertLock();
11367
- try {
11368
- options?.onProgress?.(10, "convert");
11369
- const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
11370
- options?.onProgress?.(100, "done");
11371
- return {
11372
- success: true,
11373
- pdf: new Uint8Array(pdf),
11374
- sourceFormat: format
11375
- };
11376
- } catch (err) {
11377
- if (err instanceof ConvertError) {
11600
+ const MAX_FILE_SIZE = 500 * 1024 * 1024;
11601
+ if (buffer.length > MAX_FILE_SIZE) {
11602
+ emitter.error(
11603
+ "detect",
11604
+ "FILE_TOO_LARGE",
11605
+ `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
11606
+ );
11607
+ return {
11608
+ success: false,
11609
+ code: "FILE_TOO_LARGE",
11610
+ error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
11611
+ stage: "detect"
11612
+ };
11613
+ }
11614
+ const format = detectFormat(toArrayBuffer(buffer));
11615
+ emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
11616
+ if (format !== "hwp" && format !== "hwpx") {
11617
+ emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
11378
11618
  return {
11379
11619
  success: false,
11380
- code: err.code,
11381
- error: err.message,
11620
+ code: "UNSUPPORTED_FORMAT",
11621
+ error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
11622
+ stage: "detect"
11623
+ };
11624
+ }
11625
+ emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11626
+ let sofficePath;
11627
+ try {
11628
+ sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
11629
+ } catch (err) {
11630
+ if (err instanceof ConvertError) {
11631
+ return {
11632
+ success: false,
11633
+ code: err.code,
11634
+ error: err.message,
11635
+ stage: "validate"
11636
+ };
11637
+ }
11638
+ throw err;
11639
+ }
11640
+ const releaseLock = await acquireConvertLock();
11641
+ try {
11642
+ emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
11643
+ emitter.progress(10, "\uBCC0\uD658 \uC911...");
11644
+ const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
11645
+ emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
11646
+ emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
11647
+ const result = {
11648
+ success: true,
11649
+ pdf: new Uint8Array(pdf),
11650
+ sourceFormat: format
11651
+ };
11652
+ emitter.complete({
11653
+ sourceFormat: format,
11654
+ pdfSize: pdf.length
11655
+ });
11656
+ return result;
11657
+ } catch (err) {
11658
+ if (err instanceof ConvertError) {
11659
+ emitter.error("convert", err.code, err.message);
11660
+ return {
11661
+ success: false,
11662
+ code: err.code,
11663
+ error: err.message,
11664
+ stage: "convert"
11665
+ };
11666
+ }
11667
+ const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
11668
+ emitter.error("convert", classifyError(err), errorMsg);
11669
+ return {
11670
+ success: false,
11671
+ code: classifyError(err),
11672
+ error: errorMsg,
11382
11673
  stage: "convert"
11383
11674
  };
11675
+ } finally {
11676
+ releaseLock();
11384
11677
  }
11678
+ } catch (unexpectedErr) {
11679
+ const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
11680
+ emitter.error("convert", "PARSE_ERROR", errorMsg);
11385
11681
  return {
11386
11682
  success: false,
11387
- code: classifyError(err),
11388
- error: err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328",
11683
+ code: "PARSE_ERROR",
11684
+ error: errorMsg,
11389
11685
  stage: "convert"
11390
11686
  };
11391
- } finally {
11392
- releaseLock();
11393
11687
  }
11394
11688
  }
11395
11689
  async function convertHwpToPdf(input, options) {
@@ -11514,9 +11808,9 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11514
11808
  };
11515
11809
 
11516
11810
  // src/pipeline/unified-ocr.ts
11517
- import { mkdir, readdir, readFile as readFile2, stat, writeFile } from "fs/promises";
11518
- import { basename as basename2, dirname as dirname3, extname, join as join4, resolve as resolve3 } from "path";
11519
- import { spawn as spawn2 } from "child_process";
11811
+ import { mkdir as mkdir2, readdir, readFile as readFile2, stat, writeFile as writeFile2 } from "fs/promises";
11812
+ import { basename as basename2, dirname as dirname3, extname, join as join5, resolve as resolve3 } from "path";
11813
+ import { spawn as spawn3 } from "child_process";
11520
11814
  import { performance } from "perf_hooks";
11521
11815
  init_logger();
11522
11816
 
@@ -11652,13 +11946,13 @@ function elapsedMs(startAt) {
11652
11946
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11653
11947
  const absInput = resolve3(inputPath);
11654
11948
  const stem = basename2(absInput, extname(absInput));
11655
- const workspaceDir = resolve3(options.workspaceDir ?? join4(dirname3(absInput), `${stem}_ocr_workspace`));
11656
- const imagesDir = join4(workspaceDir, "images");
11657
- const rawDir = join4(workspaceDir, "ocr", "raw");
11658
- const diffDir = join4(workspaceDir, "ocr", "diff");
11659
- const outputPath = resolve3(options.outputPath ?? join4(dirname3(absInput), `${stem}.md`));
11660
- const reportPath = join4(workspaceDir, "run-report.json");
11661
- const modelCachePath = join4(dirname3(absInput), ".kordoc-model-cache.json");
11949
+ const workspaceDir = resolve3(options.workspaceDir ?? join5(dirname3(absInput), `${stem}_ocr_workspace`));
11950
+ const imagesDir = join5(workspaceDir, "images");
11951
+ const rawDir = join5(workspaceDir, "ocr", "raw");
11952
+ const diffDir = join5(workspaceDir, "ocr", "diff");
11953
+ const outputPath = resolve3(options.outputPath ?? join5(dirname3(absInput), `${stem}.md`));
11954
+ const reportPath = join5(workspaceDir, "run-report.json");
11955
+ const modelCachePath = join5(dirname3(absInput), ".kordoc-model-cache.json");
11662
11956
  const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
11663
11957
  const timeoutMs = options.timeoutMs ?? 6e4;
11664
11958
  const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
@@ -11672,9 +11966,9 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11672
11966
  const keyPool = ApiKeyRotationPool.fromEnv();
11673
11967
  const runId = options.runId ?? generateRunId("ocr");
11674
11968
  const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
11675
- await mkdir(imagesDir, { recursive: true });
11676
- await mkdir(rawDir, { recursive: true });
11677
- await mkdir(diffDir, { recursive: true });
11969
+ await mkdir2(imagesDir, { recursive: true });
11970
+ await mkdir2(rawDir, { recursive: true });
11971
+ await mkdir2(diffDir, { recursive: true });
11678
11972
  const timingsMs = {};
11679
11973
  const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
11680
11974
  const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
@@ -11692,10 +11986,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11692
11986
  logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
11693
11987
  if (extname(absInput).toLowerCase() !== ".pdf") {
11694
11988
  await assertSofficeAvailable();
11695
- workingPdfPath = join4(workspaceDir, `${stem}.pdf`);
11989
+ workingPdfPath = join5(workspaceDir, `${stem}.pdf`);
11696
11990
  const inputBuffer = await readFile2(absInput);
11697
11991
  const out = await convertBuffer(inputBuffer, ".pdf");
11698
- await writeFile(workingPdfPath, out);
11992
+ await writeFile2(workingPdfPath, out);
11699
11993
  }
11700
11994
  timingsMs.convert = elapsedMs(convertStart);
11701
11995
  markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
@@ -11706,10 +12000,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11706
12000
  if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
11707
12001
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11708
12002
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
11709
- await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join4(imagesDir, "page")]);
12003
+ await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join5(imagesDir, "page")]);
11710
12004
  const firstFiles = (await readdir(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
11711
12005
  if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
11712
- const probeImage = join4(imagesDir, firstFiles[0]);
12006
+ const probeImage = join5(imagesDir, firstFiles[0]);
11713
12007
  markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
11714
12008
  const probeStart = performance.now();
11715
12009
  currentStage = "probe";
@@ -11755,7 +12049,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11755
12049
  try {
11756
12050
  await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
11757
12051
  if (totalPages > 1) {
11758
- for await (const item of renderPdfToPngStream(workingPdfPath, join4(imagesDir, "page"), dpi, totalPages, 2)) {
12052
+ for await (const item of renderPdfToPngStream(workingPdfPath, join5(imagesDir, "page"), dpi, totalPages, 2)) {
11759
12053
  await queue2.enqueue(item);
11760
12054
  renderDone++;
11761
12055
  markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
@@ -11805,8 +12099,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11805
12099
  const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
11806
12100
  const rawPagePaths = [];
11807
12101
  for (const [pageNum, markdown] of sortedEntries) {
11808
- const pagePath = join4(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
11809
- await writeFile(pagePath, markdown, "utf-8");
12102
+ const pagePath = join5(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
12103
+ await writeFile2(pagePath, markdown, "utf-8");
11810
12104
  rawPagePaths.push(pagePath);
11811
12105
  }
11812
12106
  const mergeStart = performance.now();
@@ -11814,7 +12108,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11814
12108
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
11815
12109
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
11816
12110
  const merged = await mergeMarkdownPages(rawPagePaths);
11817
- await writeFile(outputPath, merged, "utf-8");
12111
+ await writeFile2(outputPath, merged, "utf-8");
11818
12112
  timingsMs.merge = elapsedMs(mergeStart);
11819
12113
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
11820
12114
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
@@ -11830,7 +12124,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11830
12124
  timingsMs,
11831
12125
  modelCachePath
11832
12126
  };
11833
- await writeFile(reportPath, JSON.stringify(report, null, 2), "utf-8");
12127
+ await writeFile2(reportPath, JSON.stringify(report, null, 2), "utf-8");
11834
12128
  logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
11835
12129
  return { outputPath, reportPath, selectedModel };
11836
12130
  } catch (err) {
@@ -11921,7 +12215,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
11921
12215
  ]);
11922
12216
  const files = await readdir(imagesDir);
11923
12217
  const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
11924
- const imagePath = join4(imagesDir, pageFiles[pageFiles.length - 1]);
12218
+ const imagePath = join5(imagesDir, pageFiles[pageFiles.length - 1]);
11925
12219
  yield { pageNumber: page, imagePath };
11926
12220
  } catch (err) {
11927
12221
  yield {
@@ -11934,7 +12228,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
11934
12228
  }
11935
12229
  async function runCommand(cmd, args) {
11936
12230
  await new Promise((resolvePromise, reject) => {
11937
- const child = spawn2(cmd, args, { stdio: "pipe" });
12231
+ const child = spawn3(cmd, args, { stdio: "pipe" });
11938
12232
  let stderr = "";
11939
12233
  child.stderr.on("data", (d) => {
11940
12234
  stderr += String(d);
@@ -11948,7 +12242,7 @@ async function runCommand(cmd, args) {
11948
12242
  }
11949
12243
  async function runCommandWithStdout(cmd, args) {
11950
12244
  return await new Promise((resolvePromise, reject) => {
11951
- const child = spawn2(cmd, args, { stdio: "pipe" });
12245
+ const child = spawn3(cmd, args, { stdio: "pipe" });
11952
12246
  let stdout = "";
11953
12247
  let stderr = "";
11954
12248
  child.stdout.on("data", (d) => {
@@ -12068,7 +12362,7 @@ async function updateModelCache(path, probes) {
12068
12362
  }
12069
12363
  }
12070
12364
  current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
12071
- await writeFile(path, JSON.stringify(current, null, 2), "utf-8");
12365
+ await writeFile2(path, JSON.stringify(current, null, 2), "utf-8");
12072
12366
  }
12073
12367
  async function ocrWorkerPool(input) {
12074
12368
  const { queue: queue2, workerCount, ocrInput, onPageDone } = input;