@clazic/kordoc 2.6.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -37,118 +37,6 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
37
37
  mod
38
38
  ));
39
39
 
40
- // src/utils.ts
41
- var utils_exports = {};
42
- __export(utils_exports, {
43
- KordocError: () => KordocError,
44
- VERSION: () => VERSION,
45
- classifyError: () => classifyError,
46
- isPathTraversal: () => isPathTraversal,
47
- normalizeKordocError: () => normalizeKordocError,
48
- precheckZipSize: () => precheckZipSize,
49
- sanitizeError: () => sanitizeError,
50
- sanitizeHref: () => sanitizeHref,
51
- toArrayBuffer: () => toArrayBuffer
52
- });
53
- function toArrayBuffer(buf) {
54
- if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
55
- return buf.buffer;
56
- }
57
- return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
58
- }
59
- function sanitizeError(err) {
60
- if (err instanceof KordocError) return err.message;
61
- return "\uBB38\uC11C \uCC98\uB9AC \uC911 \uC624\uB958\uAC00 \uBC1C\uC0DD\uD588\uC2B5\uB2C8\uB2E4";
62
- }
63
- function isPathTraversal(name) {
64
- if (name.includes("\0")) return true;
65
- const normalized = name.replace(/\\/g, "/");
66
- return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
67
- }
68
- function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
69
- try {
70
- const data = new DataView(buffer);
71
- const len = buffer.byteLength;
72
- let eocdOffset = -1;
73
- for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
74
- if (data.getUint32(i, true) === 101010256) {
75
- eocdOffset = i;
76
- break;
77
- }
78
- }
79
- if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
80
- const entryCount = data.getUint16(eocdOffset + 10, true);
81
- if (entryCount > maxEntries) {
82
- throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
83
- }
84
- const cdSize = data.getUint32(eocdOffset + 12, true);
85
- const cdOffset = data.getUint32(eocdOffset + 16, true);
86
- if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
87
- let totalUncompressed = 0;
88
- let pos = cdOffset;
89
- for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
90
- if (data.getUint32(pos, true) !== 33639248) break;
91
- totalUncompressed += data.getUint32(pos + 24, true);
92
- const nameLen = data.getUint16(pos + 28, true);
93
- const extraLen = data.getUint16(pos + 30, true);
94
- const commentLen = data.getUint16(pos + 32, true);
95
- pos += 46 + nameLen + extraLen + commentLen;
96
- }
97
- if (totalUncompressed > maxUncompressedSize) {
98
- throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
99
- }
100
- return { totalUncompressed, entryCount };
101
- } catch (err) {
102
- if (err instanceof KordocError) throw err;
103
- return { totalUncompressed: 0, entryCount: 0 };
104
- }
105
- }
106
- function sanitizeHref(href) {
107
- const trimmed = href.trim();
108
- if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
109
- return trimmed;
110
- }
111
- function classifyError(err) {
112
- if (!(err instanceof Error)) return "PARSE_ERROR";
113
- const msg = err.message;
114
- if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
115
- if (msg.includes("DRM")) return "DRM_PROTECTED";
116
- if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
117
- if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
118
- if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
119
- if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
120
- if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
121
- return "PARSE_ERROR";
122
- }
123
- function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
124
- if (err instanceof KordocError) {
125
- if (!err.stage) err.stage = stage;
126
- if (!err.code) err.code = fallbackCode;
127
- return err;
128
- }
129
- const message = err instanceof Error ? err.message : fallbackMessage;
130
- const code = err instanceof Error ? classifyError(err) : fallbackCode;
131
- return new KordocError(message || fallbackMessage, { code, stage });
132
- }
133
- var VERSION, KordocError, SAFE_HREF_RE;
134
- var init_utils = __esm({
135
- "src/utils.ts"() {
136
- "use strict";
137
- VERSION = true ? "2.5.2" : "0.0.0-dev";
138
- KordocError = class extends Error {
139
- code;
140
- stage;
141
- constructor(message, opts = {}) {
142
- super(message);
143
- this.name = "KordocError";
144
- this.code = opts.code;
145
- this.stage = opts.stage;
146
- }
147
- };
148
- SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
149
- }
150
- });
151
-
152
40
  // src/page-range.ts
153
41
  var page_range_exports = {};
154
42
  __export(page_range_exports, {
@@ -3223,8 +3111,97 @@ async function detectZipFormat(buffer) {
3223
3111
  import JSZip2 from "jszip";
3224
3112
  import { DOMParser } from "@xmldom/xmldom";
3225
3113
 
3114
+ // src/utils.ts
3115
+ var VERSION = true ? "2.6.1" : "0.0.0-dev";
3116
+ function toArrayBuffer(buf) {
3117
+ if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3118
+ return buf.buffer;
3119
+ }
3120
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
3121
+ }
3122
+ var KordocError = class extends Error {
3123
+ code;
3124
+ stage;
3125
+ constructor(message, opts = {}) {
3126
+ super(message);
3127
+ this.name = "KordocError";
3128
+ this.code = opts.code;
3129
+ this.stage = opts.stage;
3130
+ }
3131
+ };
3132
+ function isPathTraversal(name) {
3133
+ if (name.includes("\0")) return true;
3134
+ const normalized = name.replace(/\\/g, "/");
3135
+ return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
3136
+ }
3137
+ function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
3138
+ try {
3139
+ const data = new DataView(buffer);
3140
+ const len = buffer.byteLength;
3141
+ let eocdOffset = -1;
3142
+ for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
3143
+ if (data.getUint32(i, true) === 101010256) {
3144
+ eocdOffset = i;
3145
+ break;
3146
+ }
3147
+ }
3148
+ if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
3149
+ const entryCount = data.getUint16(eocdOffset + 10, true);
3150
+ if (entryCount > maxEntries) {
3151
+ throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
3152
+ }
3153
+ const cdSize = data.getUint32(eocdOffset + 12, true);
3154
+ const cdOffset = data.getUint32(eocdOffset + 16, true);
3155
+ if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
3156
+ let totalUncompressed = 0;
3157
+ let pos = cdOffset;
3158
+ for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
3159
+ if (data.getUint32(pos, true) !== 33639248) break;
3160
+ totalUncompressed += data.getUint32(pos + 24, true);
3161
+ const nameLen = data.getUint16(pos + 28, true);
3162
+ const extraLen = data.getUint16(pos + 30, true);
3163
+ const commentLen = data.getUint16(pos + 32, true);
3164
+ pos += 46 + nameLen + extraLen + commentLen;
3165
+ }
3166
+ if (totalUncompressed > maxUncompressedSize) {
3167
+ throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
3168
+ }
3169
+ return { totalUncompressed, entryCount };
3170
+ } catch (err) {
3171
+ if (err instanceof KordocError) throw err;
3172
+ return { totalUncompressed: 0, entryCount: 0 };
3173
+ }
3174
+ }
3175
+ var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
3176
+ function sanitizeHref(href) {
3177
+ const trimmed = href.trim();
3178
+ if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
3179
+ return trimmed;
3180
+ }
3181
+ function classifyError(err) {
3182
+ if (!(err instanceof Error)) return "PARSE_ERROR";
3183
+ const msg = err.message;
3184
+ if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
3185
+ if (msg.includes("DRM")) return "DRM_PROTECTED";
3186
+ if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
3187
+ if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
3188
+ if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
3189
+ if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
3190
+ if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
3191
+ return "PARSE_ERROR";
3192
+ }
3193
+ function normalizeKordocError(err, fallbackMessage, stage = "unknown", fallbackCode = "PARSE_ERROR") {
3194
+ if (err instanceof KordocError) {
3195
+ if (!err.stage) err.stage = stage;
3196
+ if (!err.code) err.code = fallbackCode;
3197
+ return err;
3198
+ }
3199
+ const message = err instanceof Error ? err.message : fallbackMessage;
3200
+ const code = err instanceof Error ? classifyError(err) : fallbackCode;
3201
+ return new KordocError(message || fallbackMessage, { code, stage });
3202
+ }
3203
+
3226
3204
  // src/table/builder.ts
3227
- init_utils();
3228
3205
  var MAX_COLS = 200;
3229
3206
  var MAX_ROWS = 1e4;
3230
3207
  function buildTable(rows) {
@@ -3484,8 +3461,6 @@ var HEADING_RATIO_H2 = 1.3;
3484
3461
  var HEADING_RATIO_H3 = 1.15;
3485
3462
 
3486
3463
  // src/hwpx/parser.ts
3487
- init_utils();
3488
- init_utils();
3489
3464
  init_page_range();
3490
3465
  init_logger();
3491
3466
  var MAX_DECOMPRESS_SIZE = 500 * 1024 * 1024;
@@ -4327,7 +4302,6 @@ function extractTextFromNode(node) {
4327
4302
  }
4328
4303
 
4329
4304
  // src/hwp5/record.ts
4330
- init_utils();
4331
4305
  import { inflateRawSync, inflateSync } from "zlib";
4332
4306
  var TAG_PARA_HEADER = 66;
4333
4307
  var TAG_PARA_TEXT = 67;
@@ -5378,7 +5352,6 @@ function parseLenientCfb(data) {
5378
5352
  }
5379
5353
 
5380
5354
  // src/hwp5/parser.ts
5381
- init_utils();
5382
5355
  init_page_range();
5383
5356
  init_logger();
5384
5357
  var CFB = __toESM(require_cfb(), 1);
@@ -6034,7 +6007,6 @@ function arrangeCells(rows, cols, cells) {
6034
6007
  }
6035
6008
 
6036
6009
  // src/pdf/parser.ts
6037
- init_utils();
6038
6010
  init_page_range();
6039
6011
  import { createRequire } from "module";
6040
6012
  import { dirname as dirname2, join as join3, resolve as resolve2 } from "path";
@@ -7926,7 +7898,6 @@ function mergeKoreanLines(text) {
7926
7898
  }
7927
7899
 
7928
7900
  // src/xlsx/parser.ts
7929
- init_utils();
7930
7901
  import JSZip3 from "jszip";
7931
7902
  import { DOMParser as DOMParser2 } from "@xmldom/xmldom";
7932
7903
  init_logger();
@@ -8255,7 +8226,6 @@ async function parseXlsxDocument(buffer, options, existingZip) {
8255
8226
  }
8256
8227
 
8257
8228
  // src/docx/parser.ts
8258
- init_utils();
8259
8229
  import JSZip4 from "jszip";
8260
8230
  import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
8261
8231
  init_logger();
@@ -8737,7 +8707,6 @@ async function parseDocxDocument(buffer, options, existingZip) {
8737
8707
  }
8738
8708
 
8739
8709
  // src/index.ts
8740
- init_utils();
8741
8710
  init_cli_provider();
8742
8711
  init_markdown_to_blocks();
8743
8712
  init_logger();
@@ -11241,7 +11210,6 @@ async function markdownToXlsx(markdown, options) {
11241
11210
 
11242
11211
  // src/convert/index.ts
11243
11212
  import { readFile } from "fs/promises";
11244
- init_utils();
11245
11213
 
11246
11214
  // src/convert/libreoffice.ts
11247
11215
  import libre from "libreoffice-convert";
@@ -11255,19 +11223,250 @@ var ConvertError = class extends Error {
11255
11223
  }
11256
11224
  };
11257
11225
 
11258
- // src/convert/libreoffice.ts
11259
- var libreConvert = libre.convert;
11260
- async function assertSofficeAvailable() {
11261
- const { runCommand: runCommand2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
11226
+ // src/convert/installer.ts
11227
+ import { homedir } from "os";
11228
+ import { join as join4, delimiter } from "path";
11229
+ import { mkdir, access, symlink, rm } from "fs/promises";
11230
+ import { createWriteStream } from "fs";
11231
+ import { spawn as spawn2 } from "child_process";
11232
+ var installInFlight = null;
11233
+ var CACHE_DIR = join4(homedir(), ".cache", "kordoc", "libreoffice");
11234
+ var VERSION_FILE = join4(CACHE_DIR, "version");
11235
+ var PACKAGES = {
11236
+ darwin: {
11237
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/mac/x86_64/LibreOffice_24.8.4_MacOS_x86-64.dmg",
11238
+ binPath: "LibreOffice.app/Contents/MacOS/soffice",
11239
+ sizeMb: 300
11240
+ },
11241
+ linux: {
11242
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/deb/x86_64/LibreOffice_24.8.4_Linux_x86-64_deb.tar.gz",
11243
+ binPath: "opt/libreoffice24.8/program/soffice",
11244
+ sizeMb: 200
11245
+ },
11246
+ win32: {
11247
+ url: "https://download.documentfoundation.org/libreoffice/stable/24.8.4/win/x86_64/LibreOffice_24.8.4_Win_x86-64.msi",
11248
+ binPath: "LibreOffice/program/soffice.exe",
11249
+ sizeMb: 350
11250
+ }
11251
+ };
11252
+ async function findInPath() {
11253
+ return new Promise((resolve4) => {
11254
+ const child = spawn2("soffice", ["--version"], { stdio: "ignore" });
11255
+ child.on("close", (code) => resolve4(code === 0 ? "soffice" : null));
11256
+ child.on("error", () => resolve4(null));
11257
+ });
11258
+ }
11259
+ async function findInCache() {
11260
+ const cachedBin = join4(CACHE_DIR, "bin", "soffice");
11261
+ try {
11262
+ await access(cachedBin);
11263
+ return cachedBin;
11264
+ } catch {
11265
+ return null;
11266
+ }
11267
+ }
11268
+ async function findInDefaultPaths() {
11269
+ const platform = process.platform;
11270
+ const paths = [];
11271
+ if (platform === "darwin") {
11272
+ paths.push(
11273
+ "/Applications/LibreOffice.app/Contents/MacOS/soffice",
11274
+ "/opt/homebrew/bin/soffice",
11275
+ "/usr/local/bin/soffice"
11276
+ );
11277
+ } else if (platform === "linux") {
11278
+ paths.push(
11279
+ "/usr/bin/soffice",
11280
+ "/usr/lib/libreoffice/program/soffice"
11281
+ );
11282
+ } else if (platform === "win32") {
11283
+ const pf = process.env["ProgramFiles"] ?? "C:\\Program Files";
11284
+ const pf86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
11285
+ paths.push(
11286
+ join4(pf, "LibreOffice", "program", "soffice.exe"),
11287
+ join4(pf86, "LibreOffice", "program", "soffice.exe")
11288
+ );
11289
+ }
11290
+ for (const p of paths) {
11291
+ try {
11292
+ await access(p);
11293
+ return p;
11294
+ } catch {
11295
+ continue;
11296
+ }
11297
+ }
11298
+ return null;
11299
+ }
11300
+ async function downloadWithProgress(url, dest, totalBytes, onProgress) {
11301
+ const response = await fetch(url);
11302
+ if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
11303
+ const file = createWriteStream(dest);
11304
+ const reader = response.body.getReader();
11305
+ let downloaded = 0;
11306
+ try {
11307
+ while (true) {
11308
+ const { done, value } = await reader.read();
11309
+ if (done) break;
11310
+ file.write(value);
11311
+ downloaded += value.length;
11312
+ onProgress?.(downloaded, totalBytes);
11313
+ }
11314
+ } finally {
11315
+ file.end();
11316
+ reader.releaseLock();
11317
+ }
11318
+ }
11319
+ async function installForPlatform(pkg, onProgress) {
11320
+ const platform = process.platform;
11321
+ await mkdir(CACHE_DIR, { recursive: true });
11322
+ const downloadPath = join4(CACHE_DIR, `download-${Date.now()}`);
11323
+ await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
11324
+ try {
11325
+ if (platform === "darwin") {
11326
+ return await installMacOS(pkg, downloadPath);
11327
+ } else if (platform === "linux") {
11328
+ return await installLinux(pkg, downloadPath);
11329
+ } else if (platform === "win32") {
11330
+ return await installWindows(pkg, downloadPath);
11331
+ }
11332
+ } catch (err) {
11333
+ await rm(downloadPath, { force: true });
11334
+ throw err;
11335
+ }
11336
+ throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
11337
+ }
11338
+ async function installMacOS(pkg, downloadPath) {
11339
+ const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
11340
+ await new Promise((resolve4, reject) => {
11341
+ const child = spawn2("hdiutil", ["attach", "-nobrowse", "-mountpoint", mountPoint, downloadPath]);
11342
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328")));
11343
+ });
11344
+ try {
11345
+ const appSource = join4(mountPoint, "LibreOffice.app");
11346
+ const appDest = join4(CACHE_DIR, "LibreOffice.app");
11347
+ await new Promise((resolve4, reject) => {
11348
+ const child = spawn2("cp", ["-R", appSource, appDest]);
11349
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
11350
+ });
11351
+ } finally {
11352
+ await new Promise((resolve4) => {
11353
+ const child = spawn2("hdiutil", ["detach", mountPoint]);
11354
+ child.on("close", () => resolve4());
11355
+ });
11356
+ }
11357
+ await rm(downloadPath, { force: true });
11358
+ return await createSymlink(join4(CACHE_DIR, pkg.binPath));
11359
+ }
11360
+ async function installLinux(pkg, downloadPath) {
11361
+ const extractDir = join4(CACHE_DIR, `extract-${Date.now()}`);
11362
+ await mkdir(extractDir, { recursive: true });
11363
+ await new Promise((resolve4, reject) => {
11364
+ const child = spawn2("tar", ["xzf", downloadPath, "-C", extractDir]);
11365
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
11366
+ });
11367
+ const debsDir = join4(extractDir, "DEBS");
11262
11368
  try {
11263
- await runCommand2("soffice", ["--version"]);
11369
+ await access(debsDir);
11370
+ const entries = await (await import("fs/promises")).readdir(debsDir);
11371
+ for (const entry of entries) {
11372
+ if (entry.endsWith(".deb")) {
11373
+ await new Promise((resolve4, reject) => {
11374
+ const child = spawn2("dpkg-deb", ["-x", join4(debsDir, entry), CACHE_DIR]);
11375
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
11376
+ });
11377
+ }
11378
+ }
11264
11379
  } catch {
11380
+ }
11381
+ await rm(downloadPath, { force: true });
11382
+ await rm(extractDir, { recursive: true, force: true });
11383
+ return await createSymlink(join4(CACHE_DIR, pkg.binPath));
11384
+ }
11385
+ async function installWindows(pkg, downloadPath) {
11386
+ await new Promise((resolve4, reject) => {
11387
+ const child = spawn2("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
11388
+ child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
11389
+ });
11390
+ await rm(downloadPath, { force: true });
11391
+ return join4(CACHE_DIR, pkg.binPath);
11392
+ }
11393
+ async function createSymlink(actualBin) {
11394
+ const binDir = join4(CACHE_DIR, "bin");
11395
+ await mkdir(binDir, { recursive: true });
11396
+ const linkBin = join4(binDir, "soffice");
11397
+ try {
11398
+ await symlink(actualBin, linkBin);
11399
+ } catch {
11400
+ }
11401
+ process.env.PATH = `${binDir}${delimiter}${process.env.PATH}`;
11402
+ return linkBin;
11403
+ }
11404
+ async function installLibreOffice(onProgress) {
11405
+ const platform = process.platform;
11406
+ const pkg = PACKAGES[platform];
11407
+ if (!pkg) {
11265
11408
  throw new ConvertError(
11409
+ "UNSUPPORTED_PLATFORM",
11410
+ `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
11411
+ );
11412
+ }
11413
+ return await installForPlatform(pkg, onProgress);
11414
+ }
11415
+ async function resolveSoffice(emitter, autoInstall = true) {
11416
+ emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11417
+ const inPath = await findInPath();
11418
+ if (inPath) {
11419
+ emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
11420
+ return inPath;
11421
+ }
11422
+ const inCache = await findInCache();
11423
+ if (inCache) {
11424
+ emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
11425
+ return inCache;
11426
+ }
11427
+ const inDefault = await findInDefaultPaths();
11428
+ if (inDefault) {
11429
+ emitter.validate("soffice_found", "\uAE30\uBCF8 \uACBD\uB85C\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inDefault });
11430
+ return inDefault;
11431
+ }
11432
+ if (!autoInstall) {
11433
+ emitter.error(
11434
+ "validate",
11266
11435
  "SOFFICE_NOT_FOUND",
11267
- "soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694."
11436
+ "LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
11437
+ "\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
11268
11438
  );
11439
+ throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
11440
+ }
11441
+ if (installInFlight) {
11442
+ return installInFlight;
11269
11443
  }
11444
+ emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
11445
+ installInFlight = (async () => {
11446
+ try {
11447
+ const installed = await installLibreOffice((downloaded, total) => {
11448
+ const percent = Math.round(downloaded / total * 100);
11449
+ emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
11450
+ percent,
11451
+ downloadedBytes: downloaded,
11452
+ totalBytes: total
11453
+ });
11454
+ });
11455
+ emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
11456
+ return installed;
11457
+ } catch (err) {
11458
+ const errorMsg = err instanceof Error ? err.message : String(err);
11459
+ emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
11460
+ throw err;
11461
+ } finally {
11462
+ installInFlight = null;
11463
+ }
11464
+ })();
11465
+ return installInFlight;
11270
11466
  }
11467
+
11468
+ // src/convert/libreoffice.ts
11469
+ var libreConvert = libre.convert;
11271
11470
  async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
11272
11471
  return new Promise((resolve4, reject) => {
11273
11472
  const timer = setTimeout(() => {
@@ -11291,6 +11490,54 @@ async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
11291
11490
  });
11292
11491
  }
11293
11492
 
11493
+ // src/convert/events.ts
11494
+ var ConvertEventEmitter = class {
11495
+ listener = null;
11496
+ /** 이벤트 리스너 등록 */
11497
+ setListener(listener) {
11498
+ this.listener = listener;
11499
+ }
11500
+ /** 이벤트 발송 */
11501
+ emit(event) {
11502
+ try {
11503
+ this.listener?.(event);
11504
+ } catch {
11505
+ }
11506
+ }
11507
+ /** 타입 안전한 헬퍼: detect 이벤트 */
11508
+ detect(stage, message, meta) {
11509
+ this.emit({ type: "detect", stage, message, ...meta });
11510
+ }
11511
+ /** 타입 안전한 헬퍼: validate 이벤트 */
11512
+ validate(stage, message, meta) {
11513
+ this.emit({ type: "validate", stage, message, ...meta });
11514
+ }
11515
+ /** 타입 안전한 헬퍼: install 이벤트 */
11516
+ install(stage, message, meta) {
11517
+ this.emit({ type: "install", stage, message, ...meta });
11518
+ }
11519
+ /** 타입 안전한 헬퍼: convert 진행 이벤트 */
11520
+ progress(percent, message) {
11521
+ this.emit({ type: "convert", stage: "convert_progress", message, percent });
11522
+ }
11523
+ /** 타입 안전한 헬퍼: convert 시작 */
11524
+ convertStart(message) {
11525
+ this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
11526
+ }
11527
+ /** 타입 안전한 헬퍼: convert 완료 */
11528
+ convertDone(message) {
11529
+ this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
11530
+ }
11531
+ /** 타입 안전한 헬퍼: 완료 이벤트 */
11532
+ complete(result) {
11533
+ this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
11534
+ }
11535
+ /** 타입 안전한 헬퍼: 에러 이벤트 */
11536
+ error(stage, code, message, suggestion) {
11537
+ this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
11538
+ }
11539
+ };
11540
+
11294
11541
  // src/convert/index.ts
11295
11542
  var isConverting = false;
11296
11543
  var queue = [];
@@ -11315,81 +11562,129 @@ async function acquireConvertLock() {
11315
11562
  });
11316
11563
  }
11317
11564
  async function convertToPdf(input, options) {
11318
- let buffer;
11319
- try {
11320
- if (typeof input === "string") {
11321
- buffer = await readFile(input);
11322
- } else if (Buffer.isBuffer(input)) {
11323
- buffer = input;
11324
- } else {
11325
- buffer = Buffer.from(input);
11326
- }
11327
- } catch (err) {
11328
- return {
11329
- success: false,
11330
- code: "PARSE_ERROR",
11331
- error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
11332
- stage: "detect"
11333
- };
11565
+ const emitter = new ConvertEventEmitter();
11566
+ if (options?.onEvent) {
11567
+ emitter.setListener(options.onEvent);
11334
11568
  }
11335
- const MAX_FILE_SIZE = 500 * 1024 * 1024;
11336
- if (buffer.length > MAX_FILE_SIZE) {
11337
- return {
11338
- success: false,
11339
- code: "FILE_TOO_LARGE",
11340
- error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
11341
- stage: "detect"
11342
- };
11343
- }
11344
- const format = detectFormat(toArrayBuffer(buffer));
11345
- if (format !== "hwp" && format !== "hwpx") {
11346
- return {
11347
- success: false,
11348
- code: "UNSUPPORTED_FORMAT",
11349
- error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
11350
- stage: "detect"
11351
- };
11569
+ if (options?.onProgress) {
11570
+ const legacyProgress = options.onProgress;
11571
+ emitter.setListener((event) => {
11572
+ if (event.type === "convert" && event.stage === "convert_progress") {
11573
+ legacyProgress(event.percent, event.message);
11574
+ }
11575
+ });
11352
11576
  }
11353
11577
  try {
11354
- await assertSofficeAvailable();
11355
- } catch (err) {
11356
- if (err instanceof ConvertError) {
11578
+ emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
11579
+ let buffer;
11580
+ try {
11581
+ if (typeof input === "string") {
11582
+ buffer = await readFile(input);
11583
+ } else if (Buffer.isBuffer(input)) {
11584
+ buffer = input;
11585
+ } else {
11586
+ buffer = Buffer.from(input);
11587
+ }
11588
+ } catch (err) {
11589
+ emitter.error(
11590
+ "detect",
11591
+ "PARSE_ERROR",
11592
+ `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
11593
+ );
11357
11594
  return {
11358
11595
  success: false,
11359
- code: err.code,
11360
- error: err.message,
11361
- stage: "validate"
11596
+ code: "PARSE_ERROR",
11597
+ error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
11598
+ stage: "detect"
11362
11599
  };
11363
11600
  }
11364
- throw err;
11365
- }
11366
- const releaseLock = await acquireConvertLock();
11367
- try {
11368
- options?.onProgress?.(10, "convert");
11369
- const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
11370
- options?.onProgress?.(100, "done");
11371
- return {
11372
- success: true,
11373
- pdf: new Uint8Array(pdf),
11374
- sourceFormat: format
11375
- };
11376
- } catch (err) {
11377
- if (err instanceof ConvertError) {
11601
+ const MAX_FILE_SIZE = 500 * 1024 * 1024;
11602
+ if (buffer.length > MAX_FILE_SIZE) {
11603
+ emitter.error(
11604
+ "detect",
11605
+ "FILE_TOO_LARGE",
11606
+ `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
11607
+ );
11378
11608
  return {
11379
11609
  success: false,
11380
- code: err.code,
11381
- error: err.message,
11610
+ code: "FILE_TOO_LARGE",
11611
+ error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
11612
+ stage: "detect"
11613
+ };
11614
+ }
11615
+ const format = detectFormat(toArrayBuffer(buffer));
11616
+ emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
11617
+ if (format !== "hwp" && format !== "hwpx") {
11618
+ emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
11619
+ return {
11620
+ success: false,
11621
+ code: "UNSUPPORTED_FORMAT",
11622
+ error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
11623
+ stage: "detect"
11624
+ };
11625
+ }
11626
+ emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11627
+ let sofficePath;
11628
+ try {
11629
+ sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
11630
+ } catch (err) {
11631
+ if (err instanceof ConvertError) {
11632
+ return {
11633
+ success: false,
11634
+ code: err.code,
11635
+ error: err.message,
11636
+ stage: "validate"
11637
+ };
11638
+ }
11639
+ throw err;
11640
+ }
11641
+ const releaseLock = await acquireConvertLock();
11642
+ try {
11643
+ emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
11644
+ emitter.progress(10, "\uBCC0\uD658 \uC911...");
11645
+ const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
11646
+ emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
11647
+ emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
11648
+ const result = {
11649
+ success: true,
11650
+ pdf: new Uint8Array(pdf),
11651
+ sourceFormat: format
11652
+ };
11653
+ emitter.complete({
11654
+ sourceFormat: format,
11655
+ pdfSize: pdf.length
11656
+ });
11657
+ return result;
11658
+ } catch (err) {
11659
+ if (err instanceof ConvertError) {
11660
+ emitter.error("convert", err.code, err.message);
11661
+ return {
11662
+ success: false,
11663
+ code: err.code,
11664
+ error: err.message,
11665
+ stage: "convert"
11666
+ };
11667
+ }
11668
+ const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
11669
+ emitter.error("convert", classifyError(err), errorMsg);
11670
+ return {
11671
+ success: false,
11672
+ code: classifyError(err),
11673
+ error: errorMsg,
11382
11674
  stage: "convert"
11383
11675
  };
11676
+ } finally {
11677
+ releaseLock();
11384
11678
  }
11679
+ } catch (unexpectedErr) {
11680
+ const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
11681
+ emitter.error("convert", "PARSE_ERROR", errorMsg);
11385
11682
  return {
11386
11683
  success: false,
11387
- code: classifyError(err),
11388
- error: err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328",
11684
+ code: "PARSE_ERROR",
11685
+ error: errorMsg,
11389
11686
  stage: "convert"
11390
11687
  };
11391
- } finally {
11392
- releaseLock();
11393
11688
  }
11394
11689
  }
11395
11690
  async function convertHwpToPdf(input, options) {
@@ -11417,9 +11712,6 @@ async function convertHwpxToPdf(input, options) {
11417
11712
  return result;
11418
11713
  }
11419
11714
 
11420
- // src/index.ts
11421
- init_utils();
11422
-
11423
11715
  // src/ocr/api-key-rotation.ts
11424
11716
  var AllKeysCoolingDownError = class extends Error {
11425
11717
  waitMs;
@@ -11514,9 +11806,9 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11514
11806
  };
11515
11807
 
11516
11808
  // src/pipeline/unified-ocr.ts
11517
- import { mkdir, readdir, readFile as readFile2, stat, writeFile } from "fs/promises";
11518
- import { basename as basename2, dirname as dirname3, extname, join as join4, resolve as resolve3 } from "path";
11519
- import { spawn as spawn2 } from "child_process";
11809
+ import { mkdir as mkdir2, readdir, readFile as readFile2, stat, writeFile as writeFile2 } from "fs/promises";
11810
+ import { basename as basename2, delimiter as delimiter2, dirname as dirname3, extname, join as join5, resolve as resolve3 } from "path";
11811
+ import { spawn as spawn3 } from "child_process";
11520
11812
  import { performance } from "perf_hooks";
11521
11813
  init_logger();
11522
11814
 
@@ -11652,13 +11944,13 @@ function elapsedMs(startAt) {
11652
11944
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11653
11945
  const absInput = resolve3(inputPath);
11654
11946
  const stem = basename2(absInput, extname(absInput));
11655
- const workspaceDir = resolve3(options.workspaceDir ?? join4(dirname3(absInput), `${stem}_ocr_workspace`));
11656
- const imagesDir = join4(workspaceDir, "images");
11657
- const rawDir = join4(workspaceDir, "ocr", "raw");
11658
- const diffDir = join4(workspaceDir, "ocr", "diff");
11659
- const outputPath = resolve3(options.outputPath ?? join4(dirname3(absInput), `${stem}.md`));
11660
- const reportPath = join4(workspaceDir, "run-report.json");
11661
- const modelCachePath = join4(dirname3(absInput), ".kordoc-model-cache.json");
11947
+ const workspaceDir = resolve3(options.workspaceDir ?? join5(dirname3(absInput), `${stem}_ocr_workspace`));
11948
+ const imagesDir = join5(workspaceDir, "images");
11949
+ const rawDir = join5(workspaceDir, "ocr", "raw");
11950
+ const diffDir = join5(workspaceDir, "ocr", "diff");
11951
+ const outputPath = resolve3(options.outputPath ?? join5(dirname3(absInput), `${stem}.md`));
11952
+ const reportPath = join5(workspaceDir, "run-report.json");
11953
+ const modelCachePath = join5(dirname3(absInput), ".kordoc-model-cache.json");
11662
11954
  const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
11663
11955
  const timeoutMs = options.timeoutMs ?? 6e4;
11664
11956
  const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
@@ -11672,9 +11964,9 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11672
11964
  const keyPool = ApiKeyRotationPool.fromEnv();
11673
11965
  const runId = options.runId ?? generateRunId("ocr");
11674
11966
  const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
11675
- await mkdir(imagesDir, { recursive: true });
11676
- await mkdir(rawDir, { recursive: true });
11677
- await mkdir(diffDir, { recursive: true });
11967
+ await mkdir2(imagesDir, { recursive: true });
11968
+ await mkdir2(rawDir, { recursive: true });
11969
+ await mkdir2(diffDir, { recursive: true });
11678
11970
  const timingsMs = {};
11679
11971
  const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
11680
11972
  const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
@@ -11691,11 +11983,29 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11691
11983
  markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
11692
11984
  logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
11693
11985
  if (extname(absInput).toLowerCase() !== ".pdf") {
11694
- await assertSofficeAvailable();
11695
- workingPdfPath = join4(workspaceDir, `${stem}.pdf`);
11986
+ const convertEmitter = new ConvertEventEmitter();
11987
+ if (options.onEvent) {
11988
+ convertEmitter.setListener((evt) => {
11989
+ if (evt.type === "install" || evt.type === "validate" || evt.type === "error") {
11990
+ try {
11991
+ ;
11992
+ options.onEvent(evt);
11993
+ } catch {
11994
+ }
11995
+ }
11996
+ });
11997
+ }
11998
+ if (options.sofficePath) {
11999
+ const sofficeDir = dirname3(options.sofficePath);
12000
+ process.env.PATH = `${sofficeDir}${delimiter2}${process.env.PATH ?? ""}`;
12001
+ convertEmitter.validate("soffice_found", "\uC9C1\uC811 \uC9C0\uC815\uB41C LibreOffice \uACBD\uB85C \uC0AC\uC6A9", { sofficePath: options.sofficePath });
12002
+ } else {
12003
+ await resolveSoffice(convertEmitter, options.autoInstallLibreOffice ?? false);
12004
+ }
12005
+ workingPdfPath = join5(workspaceDir, `${stem}.pdf`);
11696
12006
  const inputBuffer = await readFile2(absInput);
11697
12007
  const out = await convertBuffer(inputBuffer, ".pdf");
11698
- await writeFile(workingPdfPath, out);
12008
+ await writeFile2(workingPdfPath, out);
11699
12009
  }
11700
12010
  timingsMs.convert = elapsedMs(convertStart);
11701
12011
  markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
@@ -11706,10 +12016,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11706
12016
  if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
11707
12017
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11708
12018
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
11709
- await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join4(imagesDir, "page")]);
12019
+ await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join5(imagesDir, "page")]);
11710
12020
  const firstFiles = (await readdir(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
11711
12021
  if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
11712
- const probeImage = join4(imagesDir, firstFiles[0]);
12022
+ const probeImage = join5(imagesDir, firstFiles[0]);
11713
12023
  markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
11714
12024
  const probeStart = performance.now();
11715
12025
  currentStage = "probe";
@@ -11755,7 +12065,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11755
12065
  try {
11756
12066
  await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
11757
12067
  if (totalPages > 1) {
11758
- for await (const item of renderPdfToPngStream(workingPdfPath, join4(imagesDir, "page"), dpi, totalPages, 2)) {
12068
+ for await (const item of renderPdfToPngStream(workingPdfPath, join5(imagesDir, "page"), dpi, totalPages, 2)) {
11759
12069
  await queue2.enqueue(item);
11760
12070
  renderDone++;
11761
12071
  markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
@@ -11805,8 +12115,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11805
12115
  const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
11806
12116
  const rawPagePaths = [];
11807
12117
  for (const [pageNum, markdown] of sortedEntries) {
11808
- const pagePath = join4(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
11809
- await writeFile(pagePath, markdown, "utf-8");
12118
+ const pagePath = join5(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
12119
+ await writeFile2(pagePath, markdown, "utf-8");
11810
12120
  rawPagePaths.push(pagePath);
11811
12121
  }
11812
12122
  const mergeStart = performance.now();
@@ -11814,7 +12124,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11814
12124
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
11815
12125
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
11816
12126
  const merged = await mergeMarkdownPages(rawPagePaths);
11817
- await writeFile(outputPath, merged, "utf-8");
12127
+ await writeFile2(outputPath, merged, "utf-8");
11818
12128
  timingsMs.merge = elapsedMs(mergeStart);
11819
12129
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
11820
12130
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
@@ -11830,7 +12140,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11830
12140
  timingsMs,
11831
12141
  modelCachePath
11832
12142
  };
11833
- await writeFile(reportPath, JSON.stringify(report, null, 2), "utf-8");
12143
+ await writeFile2(reportPath, JSON.stringify(report, null, 2), "utf-8");
11834
12144
  logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
11835
12145
  return { outputPath, reportPath, selectedModel };
11836
12146
  } catch (err) {
@@ -11921,7 +12231,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
11921
12231
  ]);
11922
12232
  const files = await readdir(imagesDir);
11923
12233
  const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
11924
- const imagePath = join4(imagesDir, pageFiles[pageFiles.length - 1]);
12234
+ const imagePath = join5(imagesDir, pageFiles[pageFiles.length - 1]);
11925
12235
  yield { pageNumber: page, imagePath };
11926
12236
  } catch (err) {
11927
12237
  yield {
@@ -11934,7 +12244,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
11934
12244
  }
11935
12245
  async function runCommand(cmd, args) {
11936
12246
  await new Promise((resolvePromise, reject) => {
11937
- const child = spawn2(cmd, args, { stdio: "pipe" });
12247
+ const child = spawn3(cmd, args, { stdio: "pipe" });
11938
12248
  let stderr = "";
11939
12249
  child.stderr.on("data", (d) => {
11940
12250
  stderr += String(d);
@@ -11948,7 +12258,7 @@ async function runCommand(cmd, args) {
11948
12258
  }
11949
12259
  async function runCommandWithStdout(cmd, args) {
11950
12260
  return await new Promise((resolvePromise, reject) => {
11951
- const child = spawn2(cmd, args, { stdio: "pipe" });
12261
+ const child = spawn3(cmd, args, { stdio: "pipe" });
11952
12262
  let stdout = "";
11953
12263
  let stderr = "";
11954
12264
  child.stdout.on("data", (d) => {
@@ -12068,7 +12378,7 @@ async function updateModelCache(path, probes) {
12068
12378
  }
12069
12379
  }
12070
12380
  current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
12071
- await writeFile(path, JSON.stringify(current, null, 2), "utf-8");
12381
+ await writeFile2(path, JSON.stringify(current, null, 2), "utf-8");
12072
12382
  }
12073
12383
  async function ocrWorkerPool(input) {
12074
12384
  const { queue: queue2, workerCount, ocrInput, onPageDone } = input;
@@ -12264,6 +12574,16 @@ function ensureSupportedInput(path) {
12264
12574
  }
12265
12575
  function normalizePipelineError(err, stage) {
12266
12576
  if (err instanceof UnifiedOcrError) return err;
12577
+ if (err instanceof ConvertError) {
12578
+ const codeMap = {
12579
+ SOFFICE_NOT_FOUND: "SOFFICE_NOT_FOUND",
12580
+ CONVERT_FAILED: "CONVERT_FAILED",
12581
+ TIMEOUT: "CONVERT_FAILED",
12582
+ UNSUPPORTED_PLATFORM: "CONVERT_FAILED",
12583
+ UNSUPPORTED_FORMAT: "UNSUPPORTED_INPUT"
12584
+ };
12585
+ return new UnifiedOcrError(codeMap[err.code] ?? "CONVERT_FAILED", stage, err.message);
12586
+ }
12267
12587
  const message = err instanceof Error ? err.message : String(err);
12268
12588
  const codeByStage = {
12269
12589
  convert: "CONVERT_FAILED",