@clazic/kordoc 2.5.2 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -37,6 +37,118 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
37
37
  mod
38
38
  ));
39
39
 
40
+ // src/utils.ts
41
+ var utils_exports = {};
42
+ __export(utils_exports, {
43
+ KordocError: () => KordocError,
44
+ VERSION: () => VERSION,
45
+ classifyError: () => classifyError,
46
+ isPathTraversal: () => isPathTraversal,
47
+ normalizeKordocError: () => normalizeKordocError,
48
+ precheckZipSize: () => precheckZipSize,
49
+ sanitizeError: () => sanitizeError,
50
+ sanitizeHref: () => sanitizeHref,
51
+ toArrayBuffer: () => toArrayBuffer
52
+ });
53
+ function toArrayBuffer(buf) {
54
+ if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
55
+ return buf.buffer;
56
+ }
57
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
58
+ }
59
+ function sanitizeError(err) {
60
+ if (err instanceof KordocError) return err.message;
61
+ return "\uBB38\uC11C \uCC98\uB9AC \uC911 \uC624\uB958\uAC00 \uBC1C\uC0DD\uD588\uC2B5\uB2C8\uB2E4";
62
+ }
63
+ function isPathTraversal(name) {
64
+ if (name.includes("\0")) return true;
65
+ const normalized = name.replace(/\\/g, "/");
66
+ return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
67
+ }
68
+ function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
69
+ try {
70
+ const data = new DataView(buffer);
71
+ const len = buffer.byteLength;
72
+ let eocdOffset = -1;
73
+ for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
74
+ if (data.getUint32(i, true) === 101010256) {
75
+ eocdOffset = i;
76
+ break;
77
+ }
78
+ }
79
+ if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
80
+ const entryCount = data.getUint16(eocdOffset + 10, true);
81
+ if (entryCount > maxEntries) {
82
+ throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
83
+ }
84
+ const cdSize = data.getUint32(eocdOffset + 12, true);
85
+ const cdOffset = data.getUint32(eocdOffset + 16, true);
86
+ if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
87
+ let totalUncompressed = 0;
88
+ let pos = cdOffset;
89
+ for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
90
+ if (data.getUint32(pos, true) !== 33639248) break;
91
+ totalUncompressed += data.getUint32(pos + 24, true);
92
+ const nameLen = data.getUint16(pos + 28, true);
93
+ const extraLen = data.getUint16(pos + 30, true);
94
+ const commentLen = data.getUint16(pos + 32, true);
95
+ pos += 46 + nameLen + extraLen + commentLen;
96
+ }
97
+ if (totalUncompressed > maxUncompressedSize) {
98
+ throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
99
+ }
100
+ return { totalUncompressed, entryCount };
101
+ } catch (err) {
102
+ if (err instanceof KordocError) throw err;
103
+ return { totalUncompressed: 0, entryCount: 0 };
104
+ }
105
+ }
106
+ function sanitizeHref(href) {
107
+ const trimmed = href.trim();
108
+ if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
109
+ return trimmed;
110
+ }
111
+ function classifyError(err) {
112
+ if (!(err instanceof Error)) return "PARSE_ERROR";
113
+ const msg = err.message;
114
+ if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
115
+ if (msg.includes("DRM")) return "DRM_PROTECTED";
116
+ if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
117
+ if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
118
+ if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
119
+ if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
120
+ if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
121
+ return "PARSE_ERROR";
122
+ }
123
+ function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
124
+ if (err instanceof KordocError) {
125
+ if (!err.stage) err.stage = stage;
126
+ if (!err.code) err.code = fallbackCode;
127
+ return err;
128
+ }
129
+ const message = err instanceof Error ? err.message : fallbackMessage;
130
+ const code = err instanceof Error ? classifyError(err) : fallbackCode;
131
+ return new KordocError(message || fallbackMessage, { code, stage });
132
+ }
133
+ var VERSION, KordocError, SAFE_HREF_RE;
134
+ var init_utils = __esm({
135
+ "src/utils.ts"() {
136
+ "use strict";
137
+ VERSION = true ? "2.6.0" : "0.0.0-dev";
138
+ KordocError = class extends Error {
139
+ code;
140
+ stage;
141
+ constructor(message, opts = {}) {
142
+ super(message);
143
+ this.name = "KordocError";
144
+ this.code = opts.code;
145
+ this.stage = opts.stage;
146
+ }
147
+ };
148
+ SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
149
+ }
150
+ });
151
+
40
152
  // src/page-range.ts
41
153
  var page_range_exports = {};
42
154
  __export(page_range_exports, {
@@ -3059,7 +3171,7 @@ var init_provider = __esm({
3059
3171
  });
3060
3172
 
3061
3173
  // src/index.ts
3062
- import { readFile as readFile2 } from "fs/promises";
3174
+ import { readFile as readFile3 } from "fs/promises";
3063
3175
 
3064
3176
  // src/detect.ts
3065
3177
  import JSZip from "jszip";
@@ -3111,97 +3223,8 @@ async function detectZipFormat(buffer) {
3111
3223
  import JSZip2 from "jszip";
3112
3224
  import { DOMParser } from "@xmldom/xmldom";
3113
3225
 
3114
- // src/utils.ts
3115
- var VERSION = true ? "2.5.1" : "0.0.0-dev";
3116
- function toArrayBuffer(buf) {
3117
- if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3118
- return buf.buffer;
3119
- }
3120
- return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
3121
- }
3122
- var KordocError = class extends Error {
3123
- code;
3124
- stage;
3125
- constructor(message, opts = {}) {
3126
- super(message);
3127
- this.name = "KordocError";
3128
- this.code = opts.code;
3129
- this.stage = opts.stage;
3130
- }
3131
- };
3132
- function isPathTraversal(name) {
3133
- if (name.includes("\0")) return true;
3134
- const normalized = name.replace(/\\/g, "/");
3135
- return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
3136
- }
3137
- function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
3138
- try {
3139
- const data = new DataView(buffer);
3140
- const len = buffer.byteLength;
3141
- let eocdOffset = -1;
3142
- for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
3143
- if (data.getUint32(i, true) === 101010256) {
3144
- eocdOffset = i;
3145
- break;
3146
- }
3147
- }
3148
- if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
3149
- const entryCount = data.getUint16(eocdOffset + 10, true);
3150
- if (entryCount > maxEntries) {
3151
- throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
3152
- }
3153
- const cdSize = data.getUint32(eocdOffset + 12, true);
3154
- const cdOffset = data.getUint32(eocdOffset + 16, true);
3155
- if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
3156
- let totalUncompressed = 0;
3157
- let pos = cdOffset;
3158
- for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
3159
- if (data.getUint32(pos, true) !== 33639248) break;
3160
- totalUncompressed += data.getUint32(pos + 24, true);
3161
- const nameLen = data.getUint16(pos + 28, true);
3162
- const extraLen = data.getUint16(pos + 30, true);
3163
- const commentLen = data.getUint16(pos + 32, true);
3164
- pos += 46 + nameLen + extraLen + commentLen;
3165
- }
3166
- if (totalUncompressed > maxUncompressedSize) {
3167
- throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
3168
- }
3169
- return { totalUncompressed, entryCount };
3170
- } catch (err) {
3171
- if (err instanceof KordocError) throw err;
3172
- return { totalUncompressed: 0, entryCount: 0 };
3173
- }
3174
- }
3175
- var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
3176
- function sanitizeHref(href) {
3177
- const trimmed = href.trim();
3178
- if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
3179
- return trimmed;
3180
- }
3181
- function classifyError(err) {
3182
- if (!(err instanceof Error)) return "PARSE_ERROR";
3183
- const msg = err.message;
3184
- if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
3185
- if (msg.includes("DRM")) return "DRM_PROTECTED";
3186
- if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
3187
- if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
3188
- if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
3189
- if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
3190
- if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
3191
- return "PARSE_ERROR";
3192
- }
3193
- function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
3194
- if (err instanceof KordocError) {
3195
- if (!err.stage) err.stage = stage;
3196
- if (!err.code) err.code = fallbackCode;
3197
- return err;
3198
- }
3199
- const message = err instanceof Error ? err.message : fallbackMessage;
3200
- const code = err instanceof Error ? classifyError(err) : fallbackCode;
3201
- return new KordocError(message || fallbackMessage, { code, stage });
3202
- }
3203
-
3204
3226
  // src/table/builder.ts
3227
+ init_utils();
3205
3228
  var MAX_COLS = 200;
3206
3229
  var MAX_ROWS = 1e4;
3207
3230
  function buildTable(rows) {
@@ -3461,6 +3484,8 @@ var HEADING_RATIO_H2 = 1.3;
3461
3484
  var HEADING_RATIO_H3 = 1.15;
3462
3485
 
3463
3486
  // src/hwpx/parser.ts
3487
+ init_utils();
3488
+ init_utils();
3464
3489
  init_page_range();
3465
3490
  init_logger();
3466
3491
  var MAX_DECOMPRESS_SIZE = 500 * 1024 * 1024;
@@ -4302,6 +4327,7 @@ function extractTextFromNode(node) {
4302
4327
  }
4303
4328
 
4304
4329
  // src/hwp5/record.ts
4330
+ init_utils();
4305
4331
  import { inflateRawSync, inflateSync } from "zlib";
4306
4332
  var TAG_PARA_HEADER = 66;
4307
4333
  var TAG_PARA_TEXT = 67;
@@ -5352,6 +5378,7 @@ function parseLenientCfb(data) {
5352
5378
  }
5353
5379
 
5354
5380
  // src/hwp5/parser.ts
5381
+ init_utils();
5355
5382
  init_page_range();
5356
5383
  init_logger();
5357
5384
  var CFB = __toESM(require_cfb(), 1);
@@ -6007,6 +6034,7 @@ function arrangeCells(rows, cols, cells) {
6007
6034
  }
6008
6035
 
6009
6036
  // src/pdf/parser.ts
6037
+ init_utils();
6010
6038
  init_page_range();
6011
6039
  import { createRequire } from "module";
6012
6040
  import { dirname as dirname2, join as join3, resolve as resolve2 } from "path";
@@ -7898,6 +7926,7 @@ function mergeKoreanLines(text) {
7898
7926
  }
7899
7927
 
7900
7928
  // src/xlsx/parser.ts
7929
+ init_utils();
7901
7930
  import JSZip3 from "jszip";
7902
7931
  import { DOMParser as DOMParser2 } from "@xmldom/xmldom";
7903
7932
  init_logger();
@@ -8226,6 +8255,7 @@ async function parseXlsxDocument(buffer, options, existingZip) {
8226
8255
  }
8227
8256
 
8228
8257
  // src/docx/parser.ts
8258
+ init_utils();
8229
8259
  import JSZip4 from "jszip";
8230
8260
  import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
8231
8261
  init_logger();
@@ -8707,6 +8737,7 @@ async function parseDocxDocument(buffer, options, existingZip) {
8707
8737
  }
8708
8738
 
8709
8739
  // src/index.ts
8740
+ init_utils();
8710
8741
  init_cli_provider();
8711
8742
  init_markdown_to_blocks();
8712
8743
  init_logger();
@@ -11208,6 +11239,481 @@ async function markdownToXlsx(markdown, options) {
11208
11239
  return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
11209
11240
  }
11210
11241
 
11242
+ // src/convert/index.ts
11243
+ import { readFile } from "fs/promises";
11244
+ init_utils();
11245
+
11246
+ // src/convert/libreoffice.ts
11247
+ import libre from "libreoffice-convert";
11248
+
11249
+ // src/convert/error.ts
11250
+ var ConvertError = class extends Error {
11251
+ constructor(code, message) {
11252
+ super(message);
11253
+ this.code = code;
11254
+ this.name = "ConvertError";
11255
+ }
11256
+ };
11257
+
11258
+ // src/convert/installer.ts
11259
+ import { homedir } from "os";
11260
+ import { join as join4, delimiter } from "path";
11261
+ import { mkdir, access, symlink, rm } from "fs/promises";
11262
+ import { createWriteStream } from "fs";
11263
+ import { spawn as spawn2 } from "child_process";
11264
+ var CACHE_DIR = join4(homedir(), ".cache", "kordoc", "libreoffice");
11265
+ var VERSION_FILE = join4(CACHE_DIR, "version");
11266
+ var PACKAGES = {
11267
+ darwin: {
11268
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/mac/x86_64/LibreOffice_24.8.4_MacOS_x86-64.dmg",
11269
+ binPath: "LibreOffice.app/Contents/MacOS/soffice",
11270
+ sizeMb: 300
11271
+ },
11272
+ linux: {
11273
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/deb/x86_64/LibreOffice_24.8.4_Linux_x86-64_deb.tar.gz",
11274
+ binPath: "opt/libreoffice24.8/program/soffice",
11275
+ sizeMb: 200
11276
+ },
11277
+ win32: {
11278
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
11279
+ binPath: "LibreOffice/program/soffice.exe",
11280
+ sizeMb: 350
11281
+ }
11282
+ };
11283
+ async function findInPath() {
11284
+ try {
11285
+ const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
11286
+ await runCommand2("soffice", ["--version"]);
11287
+ return "soffice";
11288
+ } catch {
11289
+ return null;
11290
+ }
11291
+ }
11292
+ async function findInCache() {
11293
+ const cachedBin = join4(CACHE_DIR, "bin", "soffice");
11294
+ try {
11295
+ await access(cachedBin);
11296
+ return cachedBin;
11297
+ } catch {
11298
+ return null;
11299
+ }
11300
+ }
11301
+ async function downloadWithProgress(url, dest, totalBytes, onProgress) {
11302
+ const response = await fetch(url);
11303
+ if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
11304
+ const file = createWriteStream(dest);
11305
+ const reader = response.body.getReader();
11306
+ let downloaded = 0;
11307
+ try {
11308
+ while (true) {
11309
+ const { done, value } = await reader.read();
11310
+ if (done) break;
11311
+ file.write(value);
11312
+ downloaded += value.length;
11313
+ onProgress?.(downloaded, totalBytes);
11314
+ }
11315
+ } finally {
11316
+ file.end();
11317
+ reader.releaseLock();
11318
+ }
11319
+ }
11320
+ async function installForPlatform(pkg, onProgress) {
11321
+ const platform = process.platform;
11322
+ await mkdir(CACHE_DIR, { recursive: true });
11323
+ const downloadPath = join4(CACHE_DIR, `download-${Date.now()}`);
11324
+ await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
11325
+ try {
11326
+ if (platform === "darwin") {
11327
+ return await installMacOS(pkg, downloadPath);
11328
+ } else if (platform === "linux") {
11329
+ return await installLinux(pkg, downloadPath);
11330
+ } else if (platform === "win32") {
11331
+ return await installWindows(pkg, downloadPath);
11332
+ }
11333
+ } catch (err) {
11334
+ await rm(downloadPath, { force: true });
11335
+ throw err;
11336
+ }
11337
+ throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
11338
+ }
11339
+ async function installMacOS(pkg, downloadPath) {
11340
+ const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
11341
+ await new Promise((resolve4, reject) => {
11342
+ const child = spawn2("hdiutil", ["attach", "-nobrowse", "-mountpoint", mountPoint, downloadPath]);
11343
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328")));
11344
+ });
11345
+ try {
11346
+ const appSource = join4(mountPoint, "LibreOffice.app");
11347
+ const appDest = join4(CACHE_DIR, "LibreOffice.app");
11348
+ await new Promise((resolve4, reject) => {
11349
+ const child = spawn2("cp", ["-R", appSource, appDest]);
11350
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
11351
+ });
11352
+ } finally {
11353
+ await new Promise((resolve4) => {
11354
+ const child = spawn2("hdiutil", ["detach", mountPoint]);
11355
+ child.on("close", () => resolve4());
11356
+ });
11357
+ }
11358
+ await rm(downloadPath, { force: true });
11359
+ return await createSymlink(join4(CACHE_DIR, pkg.binPath));
11360
+ }
11361
+ async function installLinux(pkg, downloadPath) {
11362
+ const extractDir = join4(CACHE_DIR, `extract-${Date.now()}`);
11363
+ await mkdir(extractDir, { recursive: true });
11364
+ await new Promise((resolve4, reject) => {
11365
+ const child = spawn2("tar", ["xzf", downloadPath, "-C", extractDir]);
11366
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
11367
+ });
11368
+ const debsDir = join4(extractDir, "DEBS");
11369
+ try {
11370
+ await access(debsDir);
11371
+ const entries = await (await import("fs/promises")).readdir(debsDir);
11372
+ for (const entry of entries) {
11373
+ if (entry.endsWith(".deb")) {
11374
+ await new Promise((resolve4, reject) => {
11375
+ const child = spawn2("dpkg-deb", ["-x", join4(debsDir, entry), CACHE_DIR]);
11376
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
11377
+ });
11378
+ }
11379
+ }
11380
+ } catch {
11381
+ }
11382
+ await rm(downloadPath, { force: true });
11383
+ await rm(extractDir, { recursive: true, force: true });
11384
+ return await createSymlink(join4(CACHE_DIR, pkg.binPath));
11385
+ }
11386
+ async function installWindows(pkg, downloadPath) {
11387
+ await new Promise((resolve4, reject) => {
11388
+ const child = spawn2("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
11389
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
11390
+ });
11391
+ await rm(downloadPath, { force: true });
11392
+ return join4(CACHE_DIR, pkg.binPath);
11393
+ }
11394
+ async function createSymlink(actualBin) {
11395
+ const binDir = join4(CACHE_DIR, "bin");
11396
+ await mkdir(binDir, { recursive: true });
11397
+ const linkBin = join4(binDir, "soffice");
11398
+ try {
11399
+ await symlink(actualBin, linkBin);
11400
+ } catch {
11401
+ }
11402
+ process.env.PATH = `${binDir}${delimiter}${process.env.PATH}`;
11403
+ return linkBin;
11404
+ }
11405
+ async function installLibreOffice(onProgress) {
11406
+ const platform = process.platform;
11407
+ const pkg = PACKAGES[platform];
11408
+ if (!pkg) {
11409
+ throw new ConvertError(
11410
+ "UNSUPPORTED_PLATFORM",
11411
+ `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
11412
+ );
11413
+ }
11414
+ return await installForPlatform(pkg, onProgress);
11415
+ }
11416
+ async function resolveSoffice(emitter, autoInstall = true) {
11417
+ emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11418
+ const inPath = await findInPath();
11419
+ if (inPath) {
11420
+ emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
11421
+ return inPath;
11422
+ }
11423
+ const inCache = await findInCache();
11424
+ if (inCache) {
11425
+ emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
11426
+ return inCache;
11427
+ }
11428
+ if (!autoInstall) {
11429
+ emitter.error(
11430
+ "validate",
11431
+ "SOFFICE_NOT_FOUND",
11432
+ "LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
11433
+ "\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
11434
+ );
11435
+ throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
11436
+ }
11437
+ emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
11438
+ try {
11439
+ const installed = await installLibreOffice((downloaded, total) => {
11440
+ const percent = Math.round(downloaded / total * 100);
11441
+ emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
11442
+ percent,
11443
+ downloadedBytes: downloaded,
11444
+ totalBytes: total
11445
+ });
11446
+ });
11447
+ emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
11448
+ return installed;
11449
+ } catch (err) {
11450
+ const errorMsg = err instanceof Error ? err.message : String(err);
11451
+ emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
11452
+ throw err;
11453
+ }
11454
+ }
11455
+
11456
+ // src/convert/libreoffice.ts
11457
+ var libreConvert = libre.convert;
11458
+ async function assertSofficeAvailable() {
11459
+ const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
11460
+ try {
11461
+ await runCommand2("soffice", ["--version"]);
11462
+ } catch {
11463
+ throw new ConvertError(
11464
+ "SOFFICE_NOT_FOUND",
11465
+ "soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694."
11466
+ );
11467
+ }
11468
+ }
11469
+ async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
11470
+ return new Promise((resolve4, reject) => {
11471
+ const timer = setTimeout(() => {
11472
+ reject(
11473
+ new ConvertError("TIMEOUT", `\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (${timeoutMs}ms \uCD08\uACFC)`)
11474
+ );
11475
+ }, timeoutMs);
11476
+ libreConvert(buffer, targetExt, void 0, (err, done) => {
11477
+ clearTimeout(timer);
11478
+ if (err || !done) {
11479
+ reject(
11480
+ new ConvertError(
11481
+ "CONVERT_FAILED",
11482
+ err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"
11483
+ )
11484
+ );
11485
+ return;
11486
+ }
11487
+ resolve4(done);
11488
+ });
11489
+ });
11490
+ }
11491
+
11492
+ // src/convert/events.ts
11493
+ var ConvertEventEmitter = class {
11494
+ listener = null;
11495
+ /** 이벤트 리스너 등록 */
11496
+ setListener(listener) {
11497
+ this.listener = listener;
11498
+ }
11499
+ /** 이벤트 발송 */
11500
+ emit(event) {
11501
+ try {
11502
+ this.listener?.(event);
11503
+ } catch {
11504
+ }
11505
+ }
11506
+ /** 타입 안전한 헬퍼: detect 이벤트 */
11507
+ detect(stage, message, meta) {
11508
+ this.emit({ type: "detect", stage, message, ...meta });
11509
+ }
11510
+ /** 타입 안전한 헬퍼: validate 이벤트 */
11511
+ validate(stage, message, meta) {
11512
+ this.emit({ type: "validate", stage, message, ...meta });
11513
+ }
11514
+ /** 타입 안전한 헬퍼: install 이벤트 */
11515
+ install(stage, message, meta) {
11516
+ this.emit({ type: "install", stage, message, ...meta });
11517
+ }
11518
+ /** 타입 안전한 헬퍼: convert 진행 이벤트 */
11519
+ progress(percent, message) {
11520
+ this.emit({ type: "convert", stage: "convert_progress", message, percent });
11521
+ }
11522
+ /** 타입 안전한 헬퍼: convert 시작 */
11523
+ convertStart(message) {
11524
+ this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
11525
+ }
11526
+ /** 타입 안전한 헬퍼: convert 완료 */
11527
+ convertDone(message) {
11528
+ this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
11529
+ }
11530
+ /** 타입 안전한 헬퍼: 완료 이벤트 */
11531
+ complete(result) {
11532
+ this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
11533
+ }
11534
+ /** 타입 안전한 헬퍼: 에러 이벤트 */
11535
+ error(stage, code, message, suggestion) {
11536
+ this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
11537
+ }
11538
+ };
11539
+
11540
+ // src/convert/index.ts
11541
+ var isConverting = false;
11542
+ var queue = [];
11543
+ async function acquireConvertLock() {
11544
+ if (!isConverting) {
11545
+ isConverting = true;
11546
+ return () => {
11547
+ isConverting = false;
11548
+ const next = queue.shift();
11549
+ next?.();
11550
+ };
11551
+ }
11552
+ return new Promise((resolve4) => {
11553
+ queue.push(() => {
11554
+ isConverting = true;
11555
+ resolve4(() => {
11556
+ isConverting = false;
11557
+ const next = queue.shift();
11558
+ next?.();
11559
+ });
11560
+ });
11561
+ });
11562
+ }
11563
+ async function convertToPdf(input, options) {
11564
+ const emitter = new ConvertEventEmitter();
11565
+ if (options?.onEvent) {
11566
+ emitter.setListener(options.onEvent);
11567
+ }
11568
+ if (options?.onProgress) {
11569
+ const legacyProgress = options.onProgress;
11570
+ emitter.setListener((event) => {
11571
+ if (event.type === "convert" && event.stage === "convert_progress") {
11572
+ legacyProgress(event.percent, event.message);
11573
+ }
11574
+ });
11575
+ }
11576
+ try {
11577
+ emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
11578
+ let buffer;
11579
+ try {
11580
+ if (typeof input === "string") {
11581
+ buffer = await readFile(input);
11582
+ } else if (Buffer.isBuffer(input)) {
11583
+ buffer = input;
11584
+ } else {
11585
+ buffer = Buffer.from(input);
11586
+ }
11587
+ } catch (err) {
11588
+ emitter.error(
11589
+ "detect",
11590
+ "PARSE_ERROR",
11591
+ `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
11592
+ );
11593
+ return {
11594
+ success: false,
11595
+ code: "PARSE_ERROR",
11596
+ error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
11597
+ stage: "detect"
11598
+ };
11599
+ }
11600
+ const MAX_FILE_SIZE = 500 * 1024 * 1024;
11601
+ if (buffer.length > MAX_FILE_SIZE) {
11602
+ emitter.error(
11603
+ "detect",
11604
+ "FILE_TOO_LARGE",
11605
+ `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
11606
+ );
11607
+ return {
11608
+ success: false,
11609
+ code: "FILE_TOO_LARGE",
11610
+ error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
11611
+ stage: "detect"
11612
+ };
11613
+ }
11614
+ const format = detectFormat(toArrayBuffer(buffer));
11615
+ emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
11616
+ if (format !== "hwp" && format !== "hwpx") {
11617
+ emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
11618
+ return {
11619
+ success: false,
11620
+ code: "UNSUPPORTED_FORMAT",
11621
+ error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
11622
+ stage: "detect"
11623
+ };
11624
+ }
11625
+ emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11626
+ let sofficePath;
11627
+ try {
11628
+ sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
11629
+ } catch (err) {
11630
+ if (err instanceof ConvertError) {
11631
+ return {
11632
+ success: false,
11633
+ code: err.code,
11634
+ error: err.message,
11635
+ stage: "validate"
11636
+ };
11637
+ }
11638
+ throw err;
11639
+ }
11640
+ const releaseLock = await acquireConvertLock();
11641
+ try {
11642
+ emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
11643
+ emitter.progress(10, "\uBCC0\uD658 \uC911...");
11644
+ const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
11645
+ emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
11646
+ emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
11647
+ const result = {
11648
+ success: true,
11649
+ pdf: new Uint8Array(pdf),
11650
+ sourceFormat: format
11651
+ };
11652
+ emitter.complete({
11653
+ sourceFormat: format,
11654
+ pdfSize: pdf.length
11655
+ });
11656
+ return result;
11657
+ } catch (err) {
11658
+ if (err instanceof ConvertError) {
11659
+ emitter.error("convert", err.code, err.message);
11660
+ return {
11661
+ success: false,
11662
+ code: err.code,
11663
+ error: err.message,
11664
+ stage: "convert"
11665
+ };
11666
+ }
11667
+ const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
11668
+ emitter.error("convert", classifyError(err), errorMsg);
11669
+ return {
11670
+ success: false,
11671
+ code: classifyError(err),
11672
+ error: errorMsg,
11673
+ stage: "convert"
11674
+ };
11675
+ } finally {
11676
+ releaseLock();
11677
+ }
11678
+ } catch (unexpectedErr) {
11679
+ const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
11680
+ emitter.error("convert", "PARSE_ERROR", errorMsg);
11681
+ return {
11682
+ success: false,
11683
+ code: "PARSE_ERROR",
11684
+ error: errorMsg,
11685
+ stage: "convert"
11686
+ };
11687
+ }
11688
+ }
11689
+ async function convertHwpToPdf(input, options) {
11690
+ const result = await convertToPdf(input, options);
11691
+ if (result.success && result.sourceFormat !== "hwp") {
11692
+ return {
11693
+ success: false,
11694
+ code: "UNSUPPORTED_FORMAT",
11695
+ error: `HWP 5.x \uD3EC\uB9F7\uC774 \uC544\uB2D9\uB2C8\uB2E4: ${result.sourceFormat}`,
11696
+ stage: "detect"
11697
+ };
11698
+ }
11699
+ return result;
11700
+ }
11701
+ async function convertHwpxToPdf(input, options) {
11702
+ const result = await convertToPdf(input, options);
11703
+ if (result.success && result.sourceFormat !== "hwpx") {
11704
+ return {
11705
+ success: false,
11706
+ code: "UNSUPPORTED_FORMAT",
11707
+ error: `HWPX \uD3EC\uB9F7\uC774 \uC544\uB2D9\uB2C8\uB2E4: ${result.sourceFormat}`,
11708
+ stage: "detect"
11709
+ };
11710
+ }
11711
+ return result;
11712
+ }
11713
+
11714
+ // src/index.ts
11715
+ init_utils();
11716
+
11211
11717
  // src/ocr/api-key-rotation.ts
11212
11718
  var AllKeysCoolingDownError = class extends Error {
11213
11719
  waitMs;
@@ -11302,11 +11808,10 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11302
11808
  };
11303
11809
 
11304
11810
  // src/pipeline/unified-ocr.ts
11305
- import { mkdir, readdir, readFile, stat, writeFile } from "fs/promises";
11306
- import { basename as basename2, dirname as dirname3, extname, join as join4, resolve as resolve3 } from "path";
11307
- import { spawn as spawn2 } from "child_process";
11811
+ import { mkdir as mkdir2, readdir, readFile as readFile2, stat, writeFile as writeFile2 } from "fs/promises";
11812
+ import { basename as basename2, dirname as dirname3, extname, join as join5, resolve as resolve3 } from "path";
11813
+ import { spawn as spawn3 } from "child_process";
11308
11814
  import { performance } from "perf_hooks";
11309
- import libre from "libreoffice-convert";
11310
11815
  init_logger();
11311
11816
 
11312
11817
  // src/pipeline/bounded-queue.ts
@@ -11368,7 +11873,6 @@ var BoundedQueue = class {
11368
11873
  };
11369
11874
 
11370
11875
  // src/pipeline/unified-ocr.ts
11371
- var libreConvert = libre.convert;
11372
11876
  var UnifiedOcrError = class extends Error {
11373
11877
  code;
11374
11878
  stage;
@@ -11442,13 +11946,13 @@ function elapsedMs(startAt) {
11442
11946
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11443
11947
  const absInput = resolve3(inputPath);
11444
11948
  const stem = basename2(absInput, extname(absInput));
11445
- const workspaceDir = resolve3(options.workspaceDir ?? join4(dirname3(absInput), `${stem}_ocr_workspace`));
11446
- const imagesDir = join4(workspaceDir, "images");
11447
- const rawDir = join4(workspaceDir, "ocr", "raw");
11448
- const diffDir = join4(workspaceDir, "ocr", "diff");
11449
- const outputPath = resolve3(options.outputPath ?? join4(dirname3(absInput), `${stem}.md`));
11450
- const reportPath = join4(workspaceDir, "run-report.json");
11451
- const modelCachePath = join4(dirname3(absInput), ".kordoc-model-cache.json");
11949
+ const workspaceDir = resolve3(options.workspaceDir ?? join5(dirname3(absInput), `${stem}_ocr_workspace`));
11950
+ const imagesDir = join5(workspaceDir, "images");
11951
+ const rawDir = join5(workspaceDir, "ocr", "raw");
11952
+ const diffDir = join5(workspaceDir, "ocr", "diff");
11953
+ const outputPath = resolve3(options.outputPath ?? join5(dirname3(absInput), `${stem}.md`));
11954
+ const reportPath = join5(workspaceDir, "run-report.json");
11955
+ const modelCachePath = join5(dirname3(absInput), ".kordoc-model-cache.json");
11452
11956
  const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
11453
11957
  const timeoutMs = options.timeoutMs ?? 6e4;
11454
11958
  const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
@@ -11462,9 +11966,9 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11462
11966
  const keyPool = ApiKeyRotationPool.fromEnv();
11463
11967
  const runId = options.runId ?? generateRunId("ocr");
11464
11968
  const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
11465
- await mkdir(imagesDir, { recursive: true });
11466
- await mkdir(rawDir, { recursive: true });
11467
- await mkdir(diffDir, { recursive: true });
11969
+ await mkdir2(imagesDir, { recursive: true });
11970
+ await mkdir2(rawDir, { recursive: true });
11971
+ await mkdir2(diffDir, { recursive: true });
11468
11972
  const timingsMs = {};
11469
11973
  const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
11470
11974
  const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
@@ -11482,10 +11986,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11482
11986
  logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
11483
11987
  if (extname(absInput).toLowerCase() !== ".pdf") {
11484
11988
  await assertSofficeAvailable();
11485
- workingPdfPath = join4(workspaceDir, `${stem}.pdf`);
11486
- const inputBuffer = await readFile(absInput);
11487
- const out = await convertWithLibreOffice(inputBuffer, ".pdf");
11488
- await writeFile(workingPdfPath, out);
11989
+ workingPdfPath = join5(workspaceDir, `${stem}.pdf`);
11990
+ const inputBuffer = await readFile2(absInput);
11991
+ const out = await convertBuffer(inputBuffer, ".pdf");
11992
+ await writeFile2(workingPdfPath, out);
11489
11993
  }
11490
11994
  timingsMs.convert = elapsedMs(convertStart);
11491
11995
  markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
@@ -11496,10 +12000,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11496
12000
  if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
11497
12001
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11498
12002
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
11499
- await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join4(imagesDir, "page")]);
12003
+ await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join5(imagesDir, "page")]);
11500
12004
  const firstFiles = (await readdir(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
11501
12005
  if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
11502
- const probeImage = join4(imagesDir, firstFiles[0]);
12006
+ const probeImage = join5(imagesDir, firstFiles[0]);
11503
12007
  markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
11504
12008
  const probeStart = performance.now();
11505
12009
  currentStage = "probe";
@@ -11535,7 +12039,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11535
12039
  const keyCount = keyPool.snapshot().length;
11536
12040
  const workerCount = Math.max(1, keyCount * concurrencyPerKey);
11537
12041
  const queueCapacity = workerCount * 2;
11538
- const queue = new BoundedQueue(queueCapacity);
12042
+ const queue2 = new BoundedQueue(queueCapacity);
11539
12043
  const ocrStart = performance.now();
11540
12044
  currentStage = "ocr";
11541
12045
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (\uC6CC\uCEE4 ${workerCount}\uAC1C)`);
@@ -11543,17 +12047,17 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11543
12047
  let renderDone = 1;
11544
12048
  const renderProducer = (async () => {
11545
12049
  try {
11546
- await queue.enqueue({ pageNumber: 1, imagePath: probeImage });
12050
+ await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
11547
12051
  if (totalPages > 1) {
11548
- for await (const item of renderPdfToPngStream(workingPdfPath, join4(imagesDir, "page"), dpi, totalPages, 2)) {
11549
- await queue.enqueue(item);
12052
+ for await (const item of renderPdfToPngStream(workingPdfPath, join5(imagesDir, "page"), dpi, totalPages, 2)) {
12053
+ await queue2.enqueue(item);
11550
12054
  renderDone++;
11551
12055
  markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
11552
12056
  logStage("debug", "render", "progress", "\uD398\uC774\uC9C0 \uB80C\uB354 \uC644\uB8CC", { page: item.pageNumber });
11553
12057
  }
11554
12058
  }
11555
12059
  } finally {
11556
- queue.close();
12060
+ queue2.close();
11557
12061
  timingsMs.render = elapsedMs(renderStart);
11558
12062
  markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
11559
12063
  logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: renderDone, elapsedMs: timingsMs.render });
@@ -11562,7 +12066,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11562
12066
  const [, pageResultsMap] = await Promise.all([
11563
12067
  renderProducer,
11564
12068
  ocrWorkerPool({
11565
- queue,
12069
+ queue: queue2,
11566
12070
  workerCount,
11567
12071
  totalPages,
11568
12072
  ocrInput: {
@@ -11595,8 +12099,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11595
12099
  const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
11596
12100
  const rawPagePaths = [];
11597
12101
  for (const [pageNum, markdown] of sortedEntries) {
11598
- const pagePath = join4(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
11599
- await writeFile(pagePath, markdown, "utf-8");
12102
+ const pagePath = join5(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
12103
+ await writeFile2(pagePath, markdown, "utf-8");
11600
12104
  rawPagePaths.push(pagePath);
11601
12105
  }
11602
12106
  const mergeStart = performance.now();
@@ -11604,7 +12108,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11604
12108
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
11605
12109
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
11606
12110
  const merged = await mergeMarkdownPages(rawPagePaths);
11607
- await writeFile(outputPath, merged, "utf-8");
12111
+ await writeFile2(outputPath, merged, "utf-8");
11608
12112
  timingsMs.merge = elapsedMs(mergeStart);
11609
12113
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
11610
12114
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
@@ -11620,7 +12124,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11620
12124
  timingsMs,
11621
12125
  modelCachePath
11622
12126
  };
11623
- await writeFile(reportPath, JSON.stringify(report, null, 2), "utf-8");
12127
+ await writeFile2(reportPath, JSON.stringify(report, null, 2), "utf-8");
11624
12128
  logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
11625
12129
  return { outputPath, reportPath, selectedModel };
11626
12130
  } catch (err) {
@@ -11682,17 +12186,6 @@ function emitProgress(cb, stage, stagePercent, weights, extra) {
11682
12186
  model: extra.model
11683
12187
  });
11684
12188
  }
11685
- async function convertWithLibreOffice(buffer, ext) {
11686
- return await new Promise((resolvePromise, reject) => {
11687
- libreConvert(buffer, ext, void 0, (err, done) => {
11688
- if (err || !done) {
11689
- reject(new UnifiedOcrError("CONVERT_FAILED", "convert", err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"));
11690
- return;
11691
- }
11692
- resolvePromise(done);
11693
- });
11694
- });
11695
- }
11696
12189
  async function getPdfPageCount(pdfPath) {
11697
12190
  const stdout = await runCommandWithStdout("pdfinfo", [pdfPath]);
11698
12191
  const m = stdout.match(/^\s*Pages:\s*(\d+)\s*$/mi);
@@ -11722,7 +12215,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
11722
12215
  ]);
11723
12216
  const files = await readdir(imagesDir);
11724
12217
  const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
11725
- const imagePath = join4(imagesDir, pageFiles[pageFiles.length - 1]);
12218
+ const imagePath = join5(imagesDir, pageFiles[pageFiles.length - 1]);
11726
12219
  yield { pageNumber: page, imagePath };
11727
12220
  } catch (err) {
11728
12221
  yield {
@@ -11735,7 +12228,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
11735
12228
  }
11736
12229
  async function runCommand(cmd, args) {
11737
12230
  await new Promise((resolvePromise, reject) => {
11738
- const child = spawn2(cmd, args, { stdio: "pipe" });
12231
+ const child = spawn3(cmd, args, { stdio: "pipe" });
11739
12232
  let stderr = "";
11740
12233
  child.stderr.on("data", (d) => {
11741
12234
  stderr += String(d);
@@ -11749,7 +12242,7 @@ async function runCommand(cmd, args) {
11749
12242
  }
11750
12243
  async function runCommandWithStdout(cmd, args) {
11751
12244
  return await new Promise((resolvePromise, reject) => {
11752
- const child = spawn2(cmd, args, { stdio: "pipe" });
12245
+ const child = spawn3(cmd, args, { stdio: "pipe" });
11753
12246
  let stdout = "";
11754
12247
  let stderr = "";
11755
12248
  child.stdout.on("data", (d) => {
@@ -11765,13 +12258,6 @@ async function runCommandWithStdout(cmd, args) {
11765
12258
  });
11766
12259
  });
11767
12260
  }
11768
- async function assertSofficeAvailable() {
11769
- try {
11770
- await runCommand("soffice", ["--version"]);
11771
- } catch {
11772
- throw new UnifiedOcrError("SOFFICE_NOT_FOUND", "convert", "soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.");
11773
- }
11774
- }
11775
12261
  function naturalPageSort(a, b) {
11776
12262
  const na = Number((a.match(/\d+/g) || ["0"]).at(-1) || 0);
11777
12263
  const nb = Number((b.match(/\d+/g) || ["0"]).at(-1) || 0);
@@ -11845,7 +12331,7 @@ function startParallelProbeRuns(input) {
11845
12331
  }
11846
12332
  async function loadModelCache(path) {
11847
12333
  try {
11848
- const raw = await readFile(path, "utf-8");
12334
+ const raw = await readFile2(path, "utf-8");
11849
12335
  return JSON.parse(raw);
11850
12336
  } catch {
11851
12337
  return null;
@@ -11876,15 +12362,15 @@ async function updateModelCache(path, probes) {
11876
12362
  }
11877
12363
  }
11878
12364
  current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
11879
- await writeFile(path, JSON.stringify(current, null, 2), "utf-8");
12365
+ await writeFile2(path, JSON.stringify(current, null, 2), "utf-8");
11880
12366
  }
11881
12367
  async function ocrWorkerPool(input) {
11882
- const { queue, workerCount, ocrInput, onPageDone } = input;
12368
+ const { queue: queue2, workerCount, ocrInput, onPageDone } = input;
11883
12369
  const results = /* @__PURE__ */ new Map();
11884
12370
  let completedCount = 0;
11885
12371
  async function worker() {
11886
12372
  while (true) {
11887
- const item = await queue.dequeue();
12373
+ const item = await queue2.dequeue();
11888
12374
  if (item === QUEUE_DONE) break;
11889
12375
  const { pageNumber, imagePath, error } = item;
11890
12376
  if (imagePath === null) {
@@ -11936,7 +12422,7 @@ async function ocrImageWithFallback(input) {
11936
12422
  async function mergeMarkdownPages(paths) {
11937
12423
  const out = [];
11938
12424
  for (let i = 0; i < paths.length; i++) {
11939
- const txt = (await readFile(paths[i], "utf-8")).trim();
12425
+ const txt = (await readFile2(paths[i], "utf-8")).trim();
11940
12426
  if (!txt) continue;
11941
12427
  out.push(txt);
11942
12428
  }
@@ -12052,7 +12538,7 @@ async function ocrImageViaNim(input) {
12052
12538
  throw new UnifiedOcrError("OCR_FAILED", "ocr", `OCR \uC7AC\uC2DC\uB3C4 \uCD08\uACFC: ${lastErr}`);
12053
12539
  }
12054
12540
  async function encodeBase64(path) {
12055
- const b = await readFile(path);
12541
+ const b = await readFile2(path);
12056
12542
  return b.toString("base64");
12057
12543
  }
12058
12544
  function stripCodeFence3(text) {
@@ -12091,7 +12577,7 @@ async function parse2(input, options) {
12091
12577
  let buffer;
12092
12578
  if (typeof input === "string") {
12093
12579
  try {
12094
- const buf = await readFile2(input);
12580
+ const buf = await readFile3(input);
12095
12581
  buffer = toArrayBuffer(buf);
12096
12582
  } catch (err) {
12097
12583
  const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
@@ -12250,6 +12736,9 @@ export {
12250
12736
  VERSION,
12251
12737
  blocksToMarkdown,
12252
12738
  compare,
12739
+ convertHwpToPdf,
12740
+ convertHwpxToPdf,
12741
+ convertToPdf,
12253
12742
  detectFormat,
12254
12743
  detectZipFormat,
12255
12744
  diffBlocks,