@clazic/kordoc 2.7.5 → 2.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -29
- package/dist/{chunk-6DUCYZRR.js → chunk-URSQEMVJ.js} +345 -523
- package/dist/chunk-URSQEMVJ.js.map +1 -0
- package/dist/{chunk-5CIZV5C3.js → chunk-X7UUXEMM.js} +2 -2
- package/dist/cli.js +5 -87
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +447 -634
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -135
- package/dist/index.d.ts +4 -135
- package/dist/index.js +440 -624
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -43
- package/dist/mcp.js.map +1 -1
- package/dist/{utils-NR7YWMWB.js → utils-QQVZGOGU.js} +2 -2
- package/dist/{watch-LDX5GPEE.js → watch-RQYUNSSH.js} +3 -3
- package/package.json +1 -2
- package/dist/chunk-6DUCYZRR.js.map +0 -1
- /package/dist/{chunk-5CIZV5C3.js.map → chunk-X7UUXEMM.js.map} +0 -0
- /package/dist/{utils-NR7YWMWB.js.map → utils-QQVZGOGU.js.map} +0 -0
- /package/dist/{watch-LDX5GPEE.js.map → watch-RQYUNSSH.js.map} +0 -0
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
precheckZipSize,
|
|
8
8
|
sanitizeHref,
|
|
9
9
|
toArrayBuffer
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-X7UUXEMM.js";
|
|
11
11
|
import {
|
|
12
12
|
parsePageRange
|
|
13
13
|
} from "./chunk-MOL7MDBG.js";
|
|
@@ -2073,13 +2073,21 @@ function sanitizeText(text) {
|
|
|
2073
2073
|
}
|
|
2074
2074
|
return result;
|
|
2075
2075
|
}
|
|
2076
|
+
function escapeGfm(text, inTableCell = false) {
|
|
2077
|
+
if (!text) return text;
|
|
2078
|
+
let result = text.replace(/(?<!\\)~/g, "\\~");
|
|
2079
|
+
if (inTableCell) {
|
|
2080
|
+
result = result.replace(/(?<!\\)\|/g, "\\|");
|
|
2081
|
+
}
|
|
2082
|
+
return result;
|
|
2083
|
+
}
|
|
2076
2084
|
function blocksToMarkdown(blocks) {
|
|
2077
2085
|
const lines = [];
|
|
2078
2086
|
for (let i = 0; i < blocks.length; i++) {
|
|
2079
2087
|
const block = blocks[i];
|
|
2080
2088
|
if (block.type === "heading" && block.text) {
|
|
2081
2089
|
const prefix = "#".repeat(Math.min(block.level || 2, 6));
|
|
2082
|
-
const headingText = sanitizeText(block.text);
|
|
2090
|
+
const headingText = escapeGfm(sanitizeText(block.text), false);
|
|
2083
2091
|
if (headingText) lines.push("", `${prefix} ${headingText}`, "");
|
|
2084
2092
|
continue;
|
|
2085
2093
|
}
|
|
@@ -2092,42 +2100,47 @@ function blocksToMarkdown(blocks) {
|
|
|
2092
2100
|
continue;
|
|
2093
2101
|
}
|
|
2094
2102
|
if (block.type === "list" && block.text) {
|
|
2095
|
-
const
|
|
2096
|
-
if (!
|
|
2097
|
-
const alreadyNumbered = block.listType === "ordered" && /^\d+\.\s/.test(
|
|
2103
|
+
const sanitized = sanitizeText(block.text);
|
|
2104
|
+
if (!sanitized) continue;
|
|
2105
|
+
const alreadyNumbered = block.listType === "ordered" && /^\d+\.\s/.test(sanitized);
|
|
2098
2106
|
const prefix = alreadyNumbered ? "" : block.listType === "ordered" ? "1. " : "- ";
|
|
2107
|
+
const listText = escapeGfm(sanitized, false);
|
|
2099
2108
|
lines.push(`${prefix}${listText}`);
|
|
2100
2109
|
if (block.children) {
|
|
2101
2110
|
for (const child of block.children) {
|
|
2102
2111
|
const childPrefix = child.listType === "ordered" ? "1." : "-";
|
|
2103
|
-
|
|
2112
|
+
const childText = child.text ? escapeGfm(sanitizeText(child.text), false) : "";
|
|
2113
|
+
lines.push(` ${childPrefix} ${childText}`);
|
|
2104
2114
|
}
|
|
2105
2115
|
}
|
|
2106
2116
|
continue;
|
|
2107
2117
|
}
|
|
2108
2118
|
if (block.type === "paragraph" && block.text) {
|
|
2109
|
-
|
|
2110
|
-
if (!
|
|
2111
|
-
if (/^\[별표\s*\d+/.test(
|
|
2119
|
+
const sanitized = sanitizeText(block.text);
|
|
2120
|
+
if (!sanitized) continue;
|
|
2121
|
+
if (/^\[별표\s*\d+/.test(sanitized)) {
|
|
2112
2122
|
const nextBlock = blocks[i + 1];
|
|
2123
|
+
const escapedSelf = escapeGfm(sanitized, false);
|
|
2113
2124
|
if (nextBlock?.type === "paragraph" && nextBlock.text && /관련\)?$/.test(nextBlock.text)) {
|
|
2114
|
-
|
|
2125
|
+
const nextEscaped = escapeGfm(sanitizeText(nextBlock.text), false);
|
|
2126
|
+
lines.push("", `## ${escapedSelf} ${nextEscaped}`, "");
|
|
2115
2127
|
i++;
|
|
2116
2128
|
} else {
|
|
2117
|
-
lines.push("", `## ${
|
|
2129
|
+
lines.push("", `## ${escapedSelf}`, "");
|
|
2118
2130
|
}
|
|
2119
2131
|
continue;
|
|
2120
2132
|
}
|
|
2121
|
-
if (/^\([^)]*조[^)]*관련\)$/.test(
|
|
2122
|
-
lines.push(`*${
|
|
2133
|
+
if (/^\([^)]*조[^)]*관련\)$/.test(sanitized)) {
|
|
2134
|
+
lines.push(`*${escapeGfm(sanitized, false)}*`, "");
|
|
2123
2135
|
continue;
|
|
2124
2136
|
}
|
|
2137
|
+
let text = escapeGfm(sanitized, false);
|
|
2125
2138
|
if (block.href) {
|
|
2126
2139
|
const href = sanitizeHref(block.href);
|
|
2127
2140
|
if (href) text = `[${text}](${href})`;
|
|
2128
2141
|
}
|
|
2129
2142
|
if (block.footnoteText) {
|
|
2130
|
-
text += ` (\uC8FC: ${block.footnoteText})`;
|
|
2143
|
+
text += ` (\uC8FC: ${escapeGfm(block.footnoteText, false)})`;
|
|
2131
2144
|
}
|
|
2132
2145
|
lines.push(text);
|
|
2133
2146
|
} else if (block.type === "table" && block.table) {
|
|
@@ -2152,13 +2165,13 @@ function tableToMarkdown(table) {
|
|
|
2152
2165
|
return content.split(/\n/).map((line) => {
|
|
2153
2166
|
const trimmed = line.trim();
|
|
2154
2167
|
if (!trimmed) return "";
|
|
2155
|
-
if (/^\d+\.\s/.test(trimmed)) return `**${trimmed}**`;
|
|
2156
|
-
if (/^[가-힣]\.\s/.test(trimmed)) return ` ${trimmed}`;
|
|
2157
|
-
return trimmed;
|
|
2168
|
+
if (/^\d+\.\s/.test(trimmed)) return `**${escapeGfm(trimmed, false)}**`;
|
|
2169
|
+
if (/^[가-힣]\.\s/.test(trimmed)) return ` ${escapeGfm(trimmed, false)}`;
|
|
2170
|
+
return escapeGfm(trimmed, false);
|
|
2158
2171
|
}).filter(Boolean).join("\n");
|
|
2159
2172
|
}
|
|
2160
2173
|
if (numCols === 1 && numRows >= 2) {
|
|
2161
|
-
return cells.map((row) => sanitizeText(row[0].text).replace(/\n/g, " ")).filter(Boolean).join("\n");
|
|
2174
|
+
return cells.map((row) => escapeGfm(sanitizeText(row[0].text).replace(/\n/g, " "), false)).filter(Boolean).join("\n");
|
|
2162
2175
|
}
|
|
2163
2176
|
const display = Array.from({ length: numRows }, () => Array(numCols).fill(""));
|
|
2164
2177
|
const skip = /* @__PURE__ */ new Set();
|
|
@@ -2167,7 +2180,7 @@ function tableToMarkdown(table) {
|
|
|
2167
2180
|
if (skip.has(`${r},${c}`)) continue;
|
|
2168
2181
|
const cell = cells[r]?.[c];
|
|
2169
2182
|
if (!cell) continue;
|
|
2170
|
-
display[r][c] = sanitizeText(cell.text).replace(/\n/g, "<br>");
|
|
2183
|
+
display[r][c] = escapeGfm(sanitizeText(cell.text).replace(/\n/g, "<br>"), true);
|
|
2171
2184
|
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
2172
2185
|
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
2173
2186
|
if (dr === 0 && dc === 0) continue;
|
|
@@ -2218,6 +2231,223 @@ var HEADING_RATIO_H1 = 1.5;
|
|
|
2218
2231
|
var HEADING_RATIO_H2 = 1.3;
|
|
2219
2232
|
var HEADING_RATIO_H3 = 1.15;
|
|
2220
2233
|
|
|
2234
|
+
// src/hwp5/equation.ts
|
|
2235
|
+
var WORD_COMMANDS = /* @__PURE__ */ new Map([
|
|
2236
|
+
["alpha", "\\alpha"],
|
|
2237
|
+
["beta", "\\beta"],
|
|
2238
|
+
["gamma", "\\gamma"],
|
|
2239
|
+
["delta", "\\delta"],
|
|
2240
|
+
["epsilon", "\\epsilon"],
|
|
2241
|
+
["theta", "\\theta"],
|
|
2242
|
+
["lambda", "\\lambda"],
|
|
2243
|
+
["mu", "\\mu"],
|
|
2244
|
+
["pi", "\\pi"],
|
|
2245
|
+
["sigma", "\\sigma"],
|
|
2246
|
+
["tau", "\\tau"],
|
|
2247
|
+
["phi", "\\phi"],
|
|
2248
|
+
["omega", "\\omega"],
|
|
2249
|
+
["sin", "\\sin"],
|
|
2250
|
+
["cos", "\\cos"],
|
|
2251
|
+
["tan", "\\tan"],
|
|
2252
|
+
["sec", "\\sec"],
|
|
2253
|
+
["csc", "\\csc"],
|
|
2254
|
+
["cot", "\\cot"],
|
|
2255
|
+
["log", "\\log"],
|
|
2256
|
+
["ln", "\\ln"],
|
|
2257
|
+
["lim", "\\lim"],
|
|
2258
|
+
["inf", "\\infty"],
|
|
2259
|
+
["sum", "\\sum"],
|
|
2260
|
+
["smallsum", "\\sum"],
|
|
2261
|
+
["prod", "\\prod"],
|
|
2262
|
+
["int", "\\int"],
|
|
2263
|
+
["oint", "\\oint"],
|
|
2264
|
+
["rightarrow", "\\rightarrow"],
|
|
2265
|
+
["leftarrow", "\\leftarrow"],
|
|
2266
|
+
["partial", "\\partial"],
|
|
2267
|
+
["nabla", "\\nabla"],
|
|
2268
|
+
["angle", "\\angle"],
|
|
2269
|
+
["triangle", "\\triangle"],
|
|
2270
|
+
["vec", "\\vec"],
|
|
2271
|
+
["bar", "\\overline"],
|
|
2272
|
+
["dot", "\\dot"],
|
|
2273
|
+
["hat", "\\hat"],
|
|
2274
|
+
["left", "\\left"],
|
|
2275
|
+
["right", "\\right"]
|
|
2276
|
+
]);
|
|
2277
|
+
var SYMBOL_WORDS = /* @__PURE__ */ new Map([
|
|
2278
|
+
["times", "\\times"],
|
|
2279
|
+
["divide", "\\div"],
|
|
2280
|
+
["div", "\\div"],
|
|
2281
|
+
["le", "\\leq"],
|
|
2282
|
+
["ge", "\\geq"],
|
|
2283
|
+
["geq", "\\geq"],
|
|
2284
|
+
["deg", "^\\circ"],
|
|
2285
|
+
["rarrow", "\\rightarrow"],
|
|
2286
|
+
["larrow", "\\leftarrow"],
|
|
2287
|
+
["lrarrow", "\\leftrightarrow"],
|
|
2288
|
+
["in", "\\in"],
|
|
2289
|
+
["notin", "\\notin"],
|
|
2290
|
+
["emptyset", "\\emptyset"],
|
|
2291
|
+
["subset", "\\subset"],
|
|
2292
|
+
["nsubset", "\\nsubseteq"],
|
|
2293
|
+
["cup", "\\cup"],
|
|
2294
|
+
["cap", "\\cap"],
|
|
2295
|
+
["smallinter", "\\cap"],
|
|
2296
|
+
["sim", "\\sim"],
|
|
2297
|
+
["circ", "\\circ"],
|
|
2298
|
+
["bot", "\\perp"],
|
|
2299
|
+
["dyad", "\\overleftrightarrow"],
|
|
2300
|
+
["arch", "\\overset{\\frown}"]
|
|
2301
|
+
]);
|
|
2302
|
+
function hwpEquationToLatex(equation) {
|
|
2303
|
+
return convertEquation(equation.replace(/\0/g, "").trim(), 0);
|
|
2304
|
+
}
|
|
2305
|
+
function convertEquation(equation, depth) {
|
|
2306
|
+
if (!equation || depth > 12) return equation;
|
|
2307
|
+
let result = equation.replace(/\s+/g, " ").replace(/`+/g, "\\,").replace(/~+/g, "\\,").trim();
|
|
2308
|
+
result = convertMatrixLike(result);
|
|
2309
|
+
result = convertRoots(result, depth);
|
|
2310
|
+
result = convertOver(result, depth);
|
|
2311
|
+
result = convertSqrt(result, depth);
|
|
2312
|
+
result = convertScripts(result);
|
|
2313
|
+
result = convertOperators(result);
|
|
2314
|
+
result = removeFontDirectives(result);
|
|
2315
|
+
result = convertWords(result);
|
|
2316
|
+
result = cleanupLatexSpacing(result);
|
|
2317
|
+
return result;
|
|
2318
|
+
}
|
|
2319
|
+
function convertMatrixLike(input) {
|
|
2320
|
+
return input.replace(
|
|
2321
|
+
/\bmatrix\s*\{([^{}]*)\}/gi,
|
|
2322
|
+
(_match, body) => `\\begin{matrix} ${body.split("#").map((part) => part.trim()).join(" & ")} \\end{matrix}`
|
|
2323
|
+
).replace(
|
|
2324
|
+
/\bcases\s*\{([^{}]*)\}/gi,
|
|
2325
|
+
(_match, body) => `\\begin{cases} ${body.split("#").map((part) => part.trim()).join(" \\\\ ")} \\end{cases}`
|
|
2326
|
+
);
|
|
2327
|
+
}
|
|
2328
|
+
function convertRoots(input, depth) {
|
|
2329
|
+
return input.replace(/(?<!\\)\broot\s+({[^{}]*}|\S+)\s+of\s+({[^{}]*}|\S+)/gi, (_match, degree, radicand) => {
|
|
2330
|
+
return `\\sqrt[${convertEquation(unwrapGroup(degree), depth + 1)}]{${convertEquation(unwrapGroup(radicand), depth + 1)}}`;
|
|
2331
|
+
});
|
|
2332
|
+
}
|
|
2333
|
+
function convertSqrt(input, depth) {
|
|
2334
|
+
return input.replace(/(?<!\\)\bsqrt\s*({[^{}]*}|\S+)/gi, (_match, radicand) => {
|
|
2335
|
+
return `\\sqrt{${convertEquation(unwrapGroup(radicand), depth + 1)}}`;
|
|
2336
|
+
});
|
|
2337
|
+
}
|
|
2338
|
+
function convertOver(input, depth) {
|
|
2339
|
+
let result = input;
|
|
2340
|
+
for (let guard = 0; guard < 50; guard++) {
|
|
2341
|
+
const over = findTopLevelWord(result, "over");
|
|
2342
|
+
if (over < 0) break;
|
|
2343
|
+
const left = readLeftAtom(result, over);
|
|
2344
|
+
const right = readRightAtom(result, over + "over".length);
|
|
2345
|
+
if (!left || !right) break;
|
|
2346
|
+
const numerator = convertEquation(unwrapGroup(left.atom), depth + 1);
|
|
2347
|
+
const denominator = convertEquation(unwrapGroup(right.atom), depth + 1);
|
|
2348
|
+
result = result.slice(0, left.start) + `\\frac{${numerator}}{${denominator}}` + result.slice(right.end);
|
|
2349
|
+
}
|
|
2350
|
+
return result;
|
|
2351
|
+
}
|
|
2352
|
+
function convertScripts(input) {
|
|
2353
|
+
return input.replace(/\s*\^\s*/g, "^").replace(/\s*_\s*/g, "_").replace(/\^(?!\{)([^\s{}_^]+)/g, "^{$1}").replace(/_(?!\{)([^\s{}_^]+)/g, "_{$1}");
|
|
2354
|
+
}
|
|
2355
|
+
function convertOperators(input) {
|
|
2356
|
+
return input.replace(/\+-/g, "\\pm").replace(/-\+/g, "\\mp").replace(/\/\//g, "\\parallel").replace(/△/g, "\\triangle ").replace(/□/g, "\\square ").replace(/‧/g, "\\cdot ").replace(/!=/g, "\\neq").replace(/<=/g, "\\leq").replace(/>=/g, "\\geq").replace(/==/g, "\\equiv");
|
|
2357
|
+
}
|
|
2358
|
+
function removeFontDirectives(input) {
|
|
2359
|
+
return input.replace(/(?<!\\)\b(?:rm|it)\b\s*/gi, "");
|
|
2360
|
+
}
|
|
2361
|
+
function convertWords(input) {
|
|
2362
|
+
return input.replace(/(?<![\\A-Za-z0-9])([A-Za-z][A-Za-z0-9]*)(?![A-Za-z0-9])/g, (word) => {
|
|
2363
|
+
const exact = SYMBOL_WORDS.get(word);
|
|
2364
|
+
if (exact) return exact;
|
|
2365
|
+
const lower = word.toLowerCase();
|
|
2366
|
+
return SYMBOL_WORDS.get(lower) ?? WORD_COMMANDS.get(lower) ?? word;
|
|
2367
|
+
});
|
|
2368
|
+
}
|
|
2369
|
+
function cleanupLatexSpacing(input) {
|
|
2370
|
+
return input.replace(/\\left\s*\{/g, "\\left\\{").replace(/\\right\s*\}/g, "\\right\\}").replace(/\\left\s*([\[\]\(\)\|])/g, "\\left$1").replace(/\\right\s*([\[\]\(\)\|])/g, "\\right$1").replace(/\s*\\,\s*/g, "\\,").replace(/\s+/g, " ").replace(/\{\s+/g, "{").replace(/\s+\}/g, "}").trim();
|
|
2371
|
+
}
|
|
2372
|
+
function findTopLevelWord(input, word) {
|
|
2373
|
+
let curly = 0;
|
|
2374
|
+
let paren = 0;
|
|
2375
|
+
for (let i = 0; i <= input.length - word.length; i++) {
|
|
2376
|
+
const ch = input[i];
|
|
2377
|
+
if (ch === "{") curly++;
|
|
2378
|
+
else if (ch === "}") curly = Math.max(0, curly - 1);
|
|
2379
|
+
else if (ch === "(") paren++;
|
|
2380
|
+
else if (ch === ")") paren = Math.max(0, paren - 1);
|
|
2381
|
+
if (curly !== 0 || paren !== 0) continue;
|
|
2382
|
+
if (input.slice(i, i + word.length).toLowerCase() !== word) continue;
|
|
2383
|
+
if (isWordChar(input[i - 1]) || isWordChar(input[i + word.length])) continue;
|
|
2384
|
+
return i;
|
|
2385
|
+
}
|
|
2386
|
+
return -1;
|
|
2387
|
+
}
|
|
2388
|
+
function readLeftAtom(input, end) {
|
|
2389
|
+
let pos = end - 1;
|
|
2390
|
+
while (pos >= 0 && /\s/.test(input[pos])) pos--;
|
|
2391
|
+
if (pos < 0) return null;
|
|
2392
|
+
if (input[pos] === "}") {
|
|
2393
|
+
const start2 = findMatchingLeft(input, pos, "{", "}");
|
|
2394
|
+
if (start2 >= 0) return { start: start2, atom: input.slice(start2, pos + 1) };
|
|
2395
|
+
}
|
|
2396
|
+
if (input[pos] === ")") {
|
|
2397
|
+
const start2 = findMatchingLeft(input, pos, "(", ")");
|
|
2398
|
+
if (start2 >= 0) return { start: start2, atom: input.slice(start2, pos + 1) };
|
|
2399
|
+
}
|
|
2400
|
+
let start = pos;
|
|
2401
|
+
while (start >= 0 && !/\s/.test(input[start]) && !/[+\-=<>]/.test(input[start])) start--;
|
|
2402
|
+
return { start: start + 1, atom: input.slice(start + 1, pos + 1) };
|
|
2403
|
+
}
|
|
2404
|
+
function readRightAtom(input, start) {
|
|
2405
|
+
let pos = start;
|
|
2406
|
+
while (pos < input.length && /\s/.test(input[pos])) pos++;
|
|
2407
|
+
if (pos >= input.length) return null;
|
|
2408
|
+
if (input[pos] === "{") {
|
|
2409
|
+
const end2 = findMatchingRight(input, pos, "{", "}");
|
|
2410
|
+
if (end2 >= 0) return { end: end2 + 1, atom: input.slice(pos, end2 + 1) };
|
|
2411
|
+
}
|
|
2412
|
+
if (input[pos] === "(") {
|
|
2413
|
+
const end2 = findMatchingRight(input, pos, "(", ")");
|
|
2414
|
+
if (end2 >= 0) return { end: end2 + 1, atom: input.slice(pos, end2 + 1) };
|
|
2415
|
+
}
|
|
2416
|
+
let end = pos;
|
|
2417
|
+
while (end < input.length && !/\s/.test(input[end]) && !/[+\-=<>]/.test(input[end])) end++;
|
|
2418
|
+
return { end, atom: input.slice(pos, end) };
|
|
2419
|
+
}
|
|
2420
|
+
function findMatchingLeft(input, closeIndex, open, close) {
|
|
2421
|
+
let depth = 0;
|
|
2422
|
+
for (let i = closeIndex; i >= 0; i--) {
|
|
2423
|
+
if (input[i] === close) depth++;
|
|
2424
|
+
else if (input[i] === open) {
|
|
2425
|
+
depth--;
|
|
2426
|
+
if (depth === 0) return i;
|
|
2427
|
+
}
|
|
2428
|
+
}
|
|
2429
|
+
return -1;
|
|
2430
|
+
}
|
|
2431
|
+
function findMatchingRight(input, openIndex, open, close) {
|
|
2432
|
+
let depth = 0;
|
|
2433
|
+
for (let i = openIndex; i < input.length; i++) {
|
|
2434
|
+
if (input[i] === open) depth++;
|
|
2435
|
+
else if (input[i] === close) {
|
|
2436
|
+
depth--;
|
|
2437
|
+
if (depth === 0) return i;
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2440
|
+
return -1;
|
|
2441
|
+
}
|
|
2442
|
+
function unwrapGroup(input) {
|
|
2443
|
+
const trimmed = input.trim();
|
|
2444
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed.slice(1, -1);
|
|
2445
|
+
return trimmed;
|
|
2446
|
+
}
|
|
2447
|
+
function isWordChar(ch) {
|
|
2448
|
+
return !!ch && /[A-Za-z0-9_]/.test(ch);
|
|
2449
|
+
}
|
|
2450
|
+
|
|
2221
2451
|
// src/hwpx/parser.ts
|
|
2222
2452
|
var MAX_DECOMPRESS_SIZE = 500 * 1024 * 1024;
|
|
2223
2453
|
var MAX_ZIP_ENTRIES = 2e3;
|
|
@@ -2910,6 +3140,17 @@ function findDescendant(node, targetTag, depth = 0) {
|
|
|
2910
3140
|
}
|
|
2911
3141
|
return null;
|
|
2912
3142
|
}
|
|
3143
|
+
function findChildByLocalName(node, targetTag) {
|
|
3144
|
+
const children = node.childNodes;
|
|
3145
|
+
if (!children) return null;
|
|
3146
|
+
for (let i = 0; i < children.length; i++) {
|
|
3147
|
+
const child = children[i];
|
|
3148
|
+
if (child.nodeType !== 1) continue;
|
|
3149
|
+
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
3150
|
+
if (tag === targetTag) return child;
|
|
3151
|
+
}
|
|
3152
|
+
return null;
|
|
3153
|
+
}
|
|
2913
3154
|
function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
|
|
2914
3155
|
const children = drawTextNode.childNodes;
|
|
2915
3156
|
if (!children) return;
|
|
@@ -3012,6 +3253,22 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
3012
3253
|
case "shapeComment":
|
|
3013
3254
|
case "drawText":
|
|
3014
3255
|
break;
|
|
3256
|
+
// 수식: <hp:equation> 내부의 <hp:script>에 HML/HULK-style 수식 본문이
|
|
3257
|
+
// 들어있음. hwpEquationToLatex로 LaTeX 변환 후 `$...$`로 래핑하여
|
|
3258
|
+
// 본문 텍스트에 인라인 삽입. 변환 실패/빈 결과는 조용히 드롭
|
|
3259
|
+
// (대체 텍스트 "수식입니다." 누출 방지는 기존 정규식이 처리).
|
|
3260
|
+
case "equation": {
|
|
3261
|
+
const script = findChildByLocalName(child, "script");
|
|
3262
|
+
const raw = script ? extractTextFromNode(script) : "";
|
|
3263
|
+
if (raw.trim()) {
|
|
3264
|
+
try {
|
|
3265
|
+
const latex = hwpEquationToLatex(raw).trim();
|
|
3266
|
+
if (latex) text += " $" + latex.replace(/\$/g, "\\$") + "$ ";
|
|
3267
|
+
} catch {
|
|
3268
|
+
}
|
|
3269
|
+
}
|
|
3270
|
+
break;
|
|
3271
|
+
}
|
|
3015
3272
|
// run 요소에서 charPrIDRef 추출
|
|
3016
3273
|
case "r": {
|
|
3017
3274
|
const runCharPr = child.getAttribute("charPrIDRef");
|
|
@@ -3078,8 +3335,13 @@ var TAG_CHAR_SHAPE = 68;
|
|
|
3078
3335
|
var TAG_CTRL_HEADER = 71;
|
|
3079
3336
|
var TAG_LIST_HEADER = 72;
|
|
3080
3337
|
var TAG_TABLE = 77;
|
|
3081
|
-
var
|
|
3082
|
-
var
|
|
3338
|
+
var TAG_EQEDIT = 88;
|
|
3339
|
+
var HWPTAG_BEGIN = 16;
|
|
3340
|
+
var TAG_ID_MAPPINGS = HWPTAG_BEGIN + 1;
|
|
3341
|
+
var TAG_FACE_NAME = HWPTAG_BEGIN + 3;
|
|
3342
|
+
var TAG_DOC_CHAR_SHAPE = HWPTAG_BEGIN + 5;
|
|
3343
|
+
var TAG_DOC_PARA_SHAPE = HWPTAG_BEGIN + 9;
|
|
3344
|
+
var TAG_DOC_STYLE = HWPTAG_BEGIN + 10;
|
|
3083
3345
|
var CHAR_LINE = 0;
|
|
3084
3346
|
var CHAR_SECTION_BREAK = 10;
|
|
3085
3347
|
var CHAR_PARA = 13;
|
|
@@ -3237,6 +3499,15 @@ function extractText(data) {
|
|
|
3237
3499
|
}
|
|
3238
3500
|
return result;
|
|
3239
3501
|
}
|
|
3502
|
+
function extractEquationText(data) {
|
|
3503
|
+
if (data.length < 6) return null;
|
|
3504
|
+
const scriptLength = data.readUInt16LE(4);
|
|
3505
|
+
const scriptStart = 6;
|
|
3506
|
+
const scriptEnd = scriptStart + scriptLength * 2;
|
|
3507
|
+
if (scriptLength <= 0 || scriptEnd > data.length) return null;
|
|
3508
|
+
const equation = data.subarray(scriptStart, scriptEnd).toString("utf16le").replace(/\0+/g, "").trim();
|
|
3509
|
+
return equation || null;
|
|
3510
|
+
}
|
|
3240
3511
|
|
|
3241
3512
|
// src/hwp5/aes.ts
|
|
3242
3513
|
var S_BOX = new Uint8Array([
|
|
@@ -4408,6 +4679,26 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
4408
4679
|
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
4409
4680
|
}
|
|
4410
4681
|
var TAG_SHAPE_COMPONENT = 74;
|
|
4682
|
+
var CTRL_ID_EQEDIT = "deqe";
|
|
4683
|
+
function isEquationControlId(ctrlId) {
|
|
4684
|
+
return ctrlId === CTRL_ID_EQEDIT || ctrlId === "eqed";
|
|
4685
|
+
}
|
|
4686
|
+
function formatEquationForMarkdown(equation) {
|
|
4687
|
+
const normalized = hwpEquationToLatex(equation);
|
|
4688
|
+
if (!normalized) return "";
|
|
4689
|
+
return `$${normalized.replace(/\$/g, "\\$")}$`;
|
|
4690
|
+
}
|
|
4691
|
+
function extractEquationFromControl(records, ctrlIdx) {
|
|
4692
|
+
const ctrlLevel = records[ctrlIdx].level;
|
|
4693
|
+
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 10; j++) {
|
|
4694
|
+
const r = records[j];
|
|
4695
|
+
if (r.level <= ctrlLevel) break;
|
|
4696
|
+
if (r.tagId !== TAG_EQEDIT) continue;
|
|
4697
|
+
const equation = extractEquationText(r.data);
|
|
4698
|
+
return equation ? formatEquationForMarkdown(equation) : null;
|
|
4699
|
+
}
|
|
4700
|
+
return null;
|
|
4701
|
+
}
|
|
4411
4702
|
function extractBinDataId(records, ctrlIdx) {
|
|
4412
4703
|
const ctrlLevel = records[ctrlIdx].level;
|
|
4413
4704
|
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 50; j++) {
|
|
@@ -4567,6 +4858,16 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
4567
4858
|
}
|
|
4568
4859
|
} else if (ctrlId === " elo" || ctrlId === "ole ") {
|
|
4569
4860
|
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
|
|
4861
|
+
} else if (isEquationControlId(ctrlId)) {
|
|
4862
|
+
const equation = extractEquationFromControl(records, i);
|
|
4863
|
+
if (equation) {
|
|
4864
|
+
const lastBlock = blocks[blocks.length - 1];
|
|
4865
|
+
if (lastBlock && lastBlock.type === "paragraph" && lastBlock.text) {
|
|
4866
|
+
lastBlock.text = lastBlock.text + " " + equation;
|
|
4867
|
+
} else {
|
|
4868
|
+
blocks.push({ type: "paragraph", text: equation, pageNumber: sectionNum });
|
|
4869
|
+
}
|
|
4870
|
+
}
|
|
4570
4871
|
} else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
|
|
4571
4872
|
const noteText = extractNoteText(records, i);
|
|
4572
4873
|
if (noteText && blocks.length > 0) {
|
|
@@ -4599,6 +4900,13 @@ function extractNoteText(records, ctrlIdx) {
|
|
|
4599
4900
|
const t = extractText(r.data).trim();
|
|
4600
4901
|
if (t) texts.push(t);
|
|
4601
4902
|
}
|
|
4903
|
+
if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
|
|
4904
|
+
const innerCtrlId = r.data.subarray(0, 4).toString("ascii");
|
|
4905
|
+
if (isEquationControlId(innerCtrlId)) {
|
|
4906
|
+
const equation = extractEquationFromControl(records, j);
|
|
4907
|
+
if (equation) texts.push(equation);
|
|
4908
|
+
}
|
|
4909
|
+
}
|
|
4602
4910
|
}
|
|
4603
4911
|
return texts.length > 0 ? texts.join(" ") : null;
|
|
4604
4912
|
}
|
|
@@ -4612,6 +4920,13 @@ function extractTextBoxText(records, ctrlIdx) {
|
|
|
4612
4920
|
const t = extractText(r.data).trim();
|
|
4613
4921
|
if (t) texts.push(t);
|
|
4614
4922
|
}
|
|
4923
|
+
if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
|
|
4924
|
+
const innerCtrlId = r.data.subarray(0, 4).toString("ascii");
|
|
4925
|
+
if (isEquationControlId(innerCtrlId)) {
|
|
4926
|
+
const equation = extractEquationFromControl(records, j);
|
|
4927
|
+
if (equation) texts.push(equation);
|
|
4928
|
+
}
|
|
4929
|
+
}
|
|
4615
4930
|
}
|
|
4616
4931
|
return texts.length > 0 ? texts.join("\n") : null;
|
|
4617
4932
|
}
|
|
@@ -4680,6 +4995,12 @@ function parseParagraphWithTables(records, startIdx) {
|
|
|
4680
4995
|
i = nextIdx;
|
|
4681
4996
|
continue;
|
|
4682
4997
|
}
|
|
4998
|
+
if (isEquationControlId(ctrlId)) {
|
|
4999
|
+
const equation = extractEquationFromControl(records, i);
|
|
5000
|
+
if (equation) {
|
|
5001
|
+
text = text ? text + " " + equation : equation;
|
|
5002
|
+
}
|
|
5003
|
+
}
|
|
4683
5004
|
}
|
|
4684
5005
|
i++;
|
|
4685
5006
|
}
|
|
@@ -6686,7 +7007,7 @@ function mergeKoreanLines(text) {
|
|
|
6686
7007
|
}
|
|
6687
7008
|
|
|
6688
7009
|
// src/index.ts
|
|
6689
|
-
import { readFile
|
|
7010
|
+
import { readFile } from "fs/promises";
|
|
6690
7011
|
|
|
6691
7012
|
// src/xlsx/parser.ts
|
|
6692
7013
|
import JSZip3 from "jszip";
|
|
@@ -9821,504 +10142,6 @@ async function markdownToXlsx(markdown, options) {
|
|
|
9821
10142
|
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
|
|
9822
10143
|
}
|
|
9823
10144
|
|
|
9824
|
-
// src/convert/index.ts
|
|
9825
|
-
import { readFile } from "fs/promises";
|
|
9826
|
-
|
|
9827
|
-
// src/convert/libreoffice.ts
|
|
9828
|
-
import libre from "libreoffice-convert";
|
|
9829
|
-
|
|
9830
|
-
// src/convert/error.ts
|
|
9831
|
-
var ConvertError = class extends Error {
|
|
9832
|
-
constructor(code, message) {
|
|
9833
|
-
super(message);
|
|
9834
|
-
this.code = code;
|
|
9835
|
-
this.name = "ConvertError";
|
|
9836
|
-
}
|
|
9837
|
-
};
|
|
9838
|
-
|
|
9839
|
-
// src/convert/installer.ts
|
|
9840
|
-
import { homedir } from "os";
|
|
9841
|
-
import { join as join2, delimiter } from "path";
|
|
9842
|
-
import { mkdir, access, symlink, rm } from "fs/promises";
|
|
9843
|
-
import { createWriteStream } from "fs";
|
|
9844
|
-
import { spawn } from "child_process";
|
|
9845
|
-
var installInFlight = null;
|
|
9846
|
-
var CACHE_DIR = join2(homedir(), ".cache", "kordoc", "libreoffice");
|
|
9847
|
-
var VERSION_FILE = join2(CACHE_DIR, "version");
|
|
9848
|
-
var PACKAGES = {
|
|
9849
|
-
darwin: {
|
|
9850
|
-
url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/mac/x86_64/LibreOffice_26.2.3_MacOS_x86-64.dmg",
|
|
9851
|
-
binPath: "LibreOffice.app/Contents/MacOS/soffice",
|
|
9852
|
-
sizeMb: 300
|
|
9853
|
-
},
|
|
9854
|
-
linux: {
|
|
9855
|
-
url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/deb/x86_64/LibreOffice_26.2.3_Linux_x86-64_deb.tar.gz",
|
|
9856
|
-
binPath: "opt/libreoffice26.2/program/soffice",
|
|
9857
|
-
sizeMb: 210
|
|
9858
|
-
},
|
|
9859
|
-
win32: {
|
|
9860
|
-
url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/win/x86_64/LibreOffice_26.2.3_Win_x86-64.msi",
|
|
9861
|
-
binPath: "LibreOffice/program/soffice.exe",
|
|
9862
|
-
sizeMb: 360
|
|
9863
|
-
}
|
|
9864
|
-
};
|
|
9865
|
-
async function findInPath() {
|
|
9866
|
-
return new Promise((resolve2) => {
|
|
9867
|
-
const child = spawn("soffice", ["--version"], { stdio: "ignore" });
|
|
9868
|
-
child.on("close", (code) => resolve2(code === 0 ? "soffice" : null));
|
|
9869
|
-
child.on("error", () => resolve2(null));
|
|
9870
|
-
});
|
|
9871
|
-
}
|
|
9872
|
-
async function findInCache() {
|
|
9873
|
-
const cachedBin = join2(CACHE_DIR, "bin", "soffice");
|
|
9874
|
-
try {
|
|
9875
|
-
await access(cachedBin);
|
|
9876
|
-
return cachedBin;
|
|
9877
|
-
} catch {
|
|
9878
|
-
return null;
|
|
9879
|
-
}
|
|
9880
|
-
}
|
|
9881
|
-
async function findInDefaultPaths() {
|
|
9882
|
-
const platform = process.platform;
|
|
9883
|
-
const paths = [];
|
|
9884
|
-
if (platform === "darwin") {
|
|
9885
|
-
paths.push(
|
|
9886
|
-
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
|
|
9887
|
-
"/opt/homebrew/bin/soffice",
|
|
9888
|
-
"/usr/local/bin/soffice"
|
|
9889
|
-
);
|
|
9890
|
-
} else if (platform === "linux") {
|
|
9891
|
-
paths.push(
|
|
9892
|
-
"/usr/bin/soffice",
|
|
9893
|
-
"/usr/lib/libreoffice/program/soffice"
|
|
9894
|
-
);
|
|
9895
|
-
} else if (platform === "win32") {
|
|
9896
|
-
const pf = process.env["ProgramFiles"] ?? "C:\\Program Files";
|
|
9897
|
-
const pf86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
|
|
9898
|
-
paths.push(
|
|
9899
|
-
join2(pf, "LibreOffice", "program", "soffice.exe"),
|
|
9900
|
-
join2(pf86, "LibreOffice", "program", "soffice.exe")
|
|
9901
|
-
);
|
|
9902
|
-
}
|
|
9903
|
-
for (const p of paths) {
|
|
9904
|
-
try {
|
|
9905
|
-
await access(p);
|
|
9906
|
-
return p;
|
|
9907
|
-
} catch {
|
|
9908
|
-
continue;
|
|
9909
|
-
}
|
|
9910
|
-
}
|
|
9911
|
-
return null;
|
|
9912
|
-
}
|
|
9913
|
-
async function downloadWithProgress(url, dest, totalBytes, onProgress) {
|
|
9914
|
-
const response = await fetch(url);
|
|
9915
|
-
if (!response.ok) throw new Error(`\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: HTTP ${response.status} (${url})`);
|
|
9916
|
-
if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
|
|
9917
|
-
const file = createWriteStream(dest);
|
|
9918
|
-
const reader = response.body.getReader();
|
|
9919
|
-
let downloaded = 0;
|
|
9920
|
-
try {
|
|
9921
|
-
while (true) {
|
|
9922
|
-
const { done, value } = await reader.read();
|
|
9923
|
-
if (done) break;
|
|
9924
|
-
if (!file.write(value)) {
|
|
9925
|
-
await new Promise((resolve2) => file.once("drain", resolve2));
|
|
9926
|
-
}
|
|
9927
|
-
downloaded += value.length;
|
|
9928
|
-
onProgress?.(downloaded, totalBytes);
|
|
9929
|
-
}
|
|
9930
|
-
} finally {
|
|
9931
|
-
reader.releaseLock();
|
|
9932
|
-
await new Promise((resolve2, reject) => {
|
|
9933
|
-
file.end((err) => err ? reject(err) : resolve2());
|
|
9934
|
-
});
|
|
9935
|
-
}
|
|
9936
|
-
}
|
|
9937
|
-
async function installForPlatform(pkg, onProgress) {
|
|
9938
|
-
const platform = process.platform;
|
|
9939
|
-
await mkdir(CACHE_DIR, { recursive: true });
|
|
9940
|
-
const downloadPath = join2(CACHE_DIR, `download-${Date.now()}`);
|
|
9941
|
-
await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
|
|
9942
|
-
try {
|
|
9943
|
-
if (platform === "darwin") {
|
|
9944
|
-
return await installMacOS(pkg, downloadPath);
|
|
9945
|
-
} else if (platform === "linux") {
|
|
9946
|
-
return await installLinux(pkg, downloadPath);
|
|
9947
|
-
} else if (platform === "win32") {
|
|
9948
|
-
return await installWindows(pkg, downloadPath);
|
|
9949
|
-
}
|
|
9950
|
-
} catch (err) {
|
|
9951
|
-
await rm(downloadPath, { force: true });
|
|
9952
|
-
throw err;
|
|
9953
|
-
}
|
|
9954
|
-
throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
|
|
9955
|
-
}
|
|
9956
|
-
async function installMacOS(pkg, downloadPath) {
|
|
9957
|
-
const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
|
|
9958
|
-
await new Promise((resolve2, reject) => {
|
|
9959
|
-
const stderr = [];
|
|
9960
|
-
const child = spawn("hdiutil", ["attach", "-nobrowse", "-noverify", "-mountpoint", mountPoint, downloadPath]);
|
|
9961
|
-
child.stderr?.on("data", (d) => stderr.push(d.toString()));
|
|
9962
|
-
child.on(
|
|
9963
|
-
"close",
|
|
9964
|
-
(code) => code === 0 ? resolve2() : reject(new Error(`dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328 (code=${code}): ${stderr.join("").trim()}`))
|
|
9965
|
-
);
|
|
9966
|
-
});
|
|
9967
|
-
try {
|
|
9968
|
-
const appSource = join2(mountPoint, "LibreOffice.app");
|
|
9969
|
-
const appDest = join2(CACHE_DIR, "LibreOffice.app");
|
|
9970
|
-
await new Promise((resolve2, reject) => {
|
|
9971
|
-
const child = spawn("cp", ["-R", appSource, appDest]);
|
|
9972
|
-
child.on("close", (code) => code === 0 ? resolve2() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
|
|
9973
|
-
});
|
|
9974
|
-
} finally {
|
|
9975
|
-
await new Promise((resolve2) => {
|
|
9976
|
-
const child = spawn("hdiutil", ["detach", mountPoint]);
|
|
9977
|
-
child.on("close", () => resolve2());
|
|
9978
|
-
});
|
|
9979
|
-
}
|
|
9980
|
-
await rm(downloadPath, { force: true });
|
|
9981
|
-
return await createSymlink(join2(CACHE_DIR, pkg.binPath));
|
|
9982
|
-
}
|
|
9983
|
-
async function installLinux(pkg, downloadPath) {
|
|
9984
|
-
const extractDir = join2(CACHE_DIR, `extract-${Date.now()}`);
|
|
9985
|
-
await mkdir(extractDir, { recursive: true });
|
|
9986
|
-
await new Promise((resolve2, reject) => {
|
|
9987
|
-
const child = spawn("tar", ["xzf", downloadPath, "-C", extractDir]);
|
|
9988
|
-
child.on("close", (code) => code === 0 ? resolve2() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
|
|
9989
|
-
});
|
|
9990
|
-
const debsDir = join2(extractDir, "DEBS");
|
|
9991
|
-
try {
|
|
9992
|
-
await access(debsDir);
|
|
9993
|
-
const entries = await (await import("fs/promises")).readdir(debsDir);
|
|
9994
|
-
for (const entry of entries) {
|
|
9995
|
-
if (entry.endsWith(".deb")) {
|
|
9996
|
-
await new Promise((resolve2, reject) => {
|
|
9997
|
-
const child = spawn("dpkg-deb", ["-x", join2(debsDir, entry), CACHE_DIR]);
|
|
9998
|
-
child.on("close", (code) => code === 0 ? resolve2() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
|
|
9999
|
-
});
|
|
10000
|
-
}
|
|
10001
|
-
}
|
|
10002
|
-
} catch {
|
|
10003
|
-
}
|
|
10004
|
-
await rm(downloadPath, { force: true });
|
|
10005
|
-
await rm(extractDir, { recursive: true, force: true });
|
|
10006
|
-
return await createSymlink(join2(CACHE_DIR, pkg.binPath));
|
|
10007
|
-
}
|
|
10008
|
-
async function installWindows(pkg, downloadPath) {
|
|
10009
|
-
await new Promise((resolve2, reject) => {
|
|
10010
|
-
const child = spawn("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
|
|
10011
|
-
child.on("close", (code) => code === 0 ? resolve2() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
|
|
10012
|
-
});
|
|
10013
|
-
await rm(downloadPath, { force: true });
|
|
10014
|
-
return join2(CACHE_DIR, pkg.binPath);
|
|
10015
|
-
}
|
|
10016
|
-
async function createSymlink(actualBin) {
|
|
10017
|
-
const binDir = join2(CACHE_DIR, "bin");
|
|
10018
|
-
await mkdir(binDir, { recursive: true });
|
|
10019
|
-
const linkBin = join2(binDir, "soffice");
|
|
10020
|
-
try {
|
|
10021
|
-
await symlink(actualBin, linkBin);
|
|
10022
|
-
} catch {
|
|
10023
|
-
}
|
|
10024
|
-
process.env.PATH = `${binDir}${delimiter}${process.env.PATH}`;
|
|
10025
|
-
return linkBin;
|
|
10026
|
-
}
|
|
10027
|
-
async function installLibreOffice(onProgress) {
|
|
10028
|
-
const platform = process.platform;
|
|
10029
|
-
const pkg = PACKAGES[platform];
|
|
10030
|
-
if (!pkg) {
|
|
10031
|
-
throw new ConvertError(
|
|
10032
|
-
"UNSUPPORTED_PLATFORM",
|
|
10033
|
-
`${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
|
|
10034
|
-
);
|
|
10035
|
-
}
|
|
10036
|
-
return await installForPlatform(pkg, onProgress);
|
|
10037
|
-
}
|
|
10038
|
-
async function resolveSoffice(emitter, autoInstall = true) {
|
|
10039
|
-
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
10040
|
-
const inPath = await findInPath();
|
|
10041
|
-
if (inPath) {
|
|
10042
|
-
emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
|
|
10043
|
-
return inPath;
|
|
10044
|
-
}
|
|
10045
|
-
const inCache = await findInCache();
|
|
10046
|
-
if (inCache) {
|
|
10047
|
-
emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
|
|
10048
|
-
return inCache;
|
|
10049
|
-
}
|
|
10050
|
-
const inDefault = await findInDefaultPaths();
|
|
10051
|
-
if (inDefault) {
|
|
10052
|
-
emitter.validate("soffice_found", "\uAE30\uBCF8 \uACBD\uB85C\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inDefault });
|
|
10053
|
-
return inDefault;
|
|
10054
|
-
}
|
|
10055
|
-
if (!autoInstall) {
|
|
10056
|
-
emitter.error(
|
|
10057
|
-
"validate",
|
|
10058
|
-
"SOFFICE_NOT_FOUND",
|
|
10059
|
-
"LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
|
|
10060
|
-
"\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
|
|
10061
|
-
);
|
|
10062
|
-
throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
|
|
10063
|
-
}
|
|
10064
|
-
if (installInFlight) {
|
|
10065
|
-
return installInFlight;
|
|
10066
|
-
}
|
|
10067
|
-
emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
|
|
10068
|
-
installInFlight = (async () => {
|
|
10069
|
-
try {
|
|
10070
|
-
const installed = await installLibreOffice((downloaded, total) => {
|
|
10071
|
-
const percent = Math.round(downloaded / total * 100);
|
|
10072
|
-
emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
|
|
10073
|
-
percent,
|
|
10074
|
-
downloadedBytes: downloaded,
|
|
10075
|
-
totalBytes: total
|
|
10076
|
-
});
|
|
10077
|
-
});
|
|
10078
|
-
emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
|
|
10079
|
-
return installed;
|
|
10080
|
-
} catch (err) {
|
|
10081
|
-
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
10082
|
-
emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
|
|
10083
|
-
throw err;
|
|
10084
|
-
} finally {
|
|
10085
|
-
installInFlight = null;
|
|
10086
|
-
}
|
|
10087
|
-
})();
|
|
10088
|
-
return installInFlight;
|
|
10089
|
-
}
|
|
10090
|
-
|
|
10091
|
-
// src/convert/libreoffice.ts
|
|
10092
|
-
var libreConvert = libre.convert;
|
|
10093
|
-
var libreConvertWithOptions = libre.convertWithOptions;
|
|
10094
|
-
async function convertBuffer(buffer, targetExt, timeoutMs = 6e4, sofficePath, sourceExt) {
|
|
10095
|
-
return new Promise((resolve2, reject) => {
|
|
10096
|
-
const timer = setTimeout(() => {
|
|
10097
|
-
reject(
|
|
10098
|
-
new ConvertError("TIMEOUT", `\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (${timeoutMs}ms \uCD08\uACFC)`)
|
|
10099
|
-
);
|
|
10100
|
-
}, timeoutMs);
|
|
10101
|
-
const cb = (err, done) => {
|
|
10102
|
-
clearTimeout(timer);
|
|
10103
|
-
if (err || !done) {
|
|
10104
|
-
reject(
|
|
10105
|
-
new ConvertError(
|
|
10106
|
-
"CONVERT_FAILED",
|
|
10107
|
-
err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"
|
|
10108
|
-
)
|
|
10109
|
-
);
|
|
10110
|
-
return;
|
|
10111
|
-
}
|
|
10112
|
-
resolve2(done);
|
|
10113
|
-
};
|
|
10114
|
-
if (sofficePath) {
|
|
10115
|
-
const fileName = sourceExt ? `source${sourceExt}` : "source";
|
|
10116
|
-
libreConvertWithOptions(buffer, targetExt, void 0, { sofficeBinaryPaths: [sofficePath], fileName }, cb);
|
|
10117
|
-
} else {
|
|
10118
|
-
libreConvert(buffer, targetExt, void 0, cb);
|
|
10119
|
-
}
|
|
10120
|
-
});
|
|
10121
|
-
}
|
|
10122
|
-
|
|
10123
|
-
// src/convert/events.ts
|
|
10124
|
-
var ConvertEventEmitter = class {
|
|
10125
|
-
listener = null;
|
|
10126
|
-
/** 이벤트 리스너 등록 */
|
|
10127
|
-
setListener(listener) {
|
|
10128
|
-
this.listener = listener;
|
|
10129
|
-
}
|
|
10130
|
-
/** 이벤트 발송 */
|
|
10131
|
-
emit(event) {
|
|
10132
|
-
try {
|
|
10133
|
-
this.listener?.(event);
|
|
10134
|
-
} catch {
|
|
10135
|
-
}
|
|
10136
|
-
}
|
|
10137
|
-
/** 타입 안전한 헬퍼: detect 이벤트 */
|
|
10138
|
-
detect(stage, message, meta) {
|
|
10139
|
-
this.emit({ type: "detect", stage, message, ...meta });
|
|
10140
|
-
}
|
|
10141
|
-
/** 타입 안전한 헬퍼: validate 이벤트 */
|
|
10142
|
-
validate(stage, message, meta) {
|
|
10143
|
-
this.emit({ type: "validate", stage, message, ...meta });
|
|
10144
|
-
}
|
|
10145
|
-
/** 타입 안전한 헬퍼: install 이벤트 */
|
|
10146
|
-
install(stage, message, meta) {
|
|
10147
|
-
this.emit({ type: "install", stage, message, ...meta });
|
|
10148
|
-
}
|
|
10149
|
-
/** 타입 안전한 헬퍼: convert 진행 이벤트 */
|
|
10150
|
-
progress(percent, message) {
|
|
10151
|
-
this.emit({ type: "convert", stage: "convert_progress", message, percent });
|
|
10152
|
-
}
|
|
10153
|
-
/** 타입 안전한 헬퍼: convert 시작 */
|
|
10154
|
-
convertStart(message) {
|
|
10155
|
-
this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
|
|
10156
|
-
}
|
|
10157
|
-
/** 타입 안전한 헬퍼: convert 완료 */
|
|
10158
|
-
convertDone(message) {
|
|
10159
|
-
this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
|
|
10160
|
-
}
|
|
10161
|
-
/** 타입 안전한 헬퍼: 완료 이벤트 */
|
|
10162
|
-
complete(result) {
|
|
10163
|
-
this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
|
|
10164
|
-
}
|
|
10165
|
-
/** 타입 안전한 헬퍼: 에러 이벤트 */
|
|
10166
|
-
error(stage, code, message, suggestion) {
|
|
10167
|
-
this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
|
|
10168
|
-
}
|
|
10169
|
-
};
|
|
10170
|
-
|
|
10171
|
-
// src/convert/index.ts
|
|
10172
|
-
var isConverting = false;
|
|
10173
|
-
var queue = [];
|
|
10174
|
-
async function acquireConvertLock() {
|
|
10175
|
-
if (!isConverting) {
|
|
10176
|
-
isConverting = true;
|
|
10177
|
-
return () => {
|
|
10178
|
-
isConverting = false;
|
|
10179
|
-
const next = queue.shift();
|
|
10180
|
-
next?.();
|
|
10181
|
-
};
|
|
10182
|
-
}
|
|
10183
|
-
return new Promise((resolve2) => {
|
|
10184
|
-
queue.push(() => {
|
|
10185
|
-
isConverting = true;
|
|
10186
|
-
resolve2(() => {
|
|
10187
|
-
isConverting = false;
|
|
10188
|
-
const next = queue.shift();
|
|
10189
|
-
next?.();
|
|
10190
|
-
});
|
|
10191
|
-
});
|
|
10192
|
-
});
|
|
10193
|
-
}
|
|
10194
|
-
async function convertToPdf(input, options) {
|
|
10195
|
-
const emitter = new ConvertEventEmitter();
|
|
10196
|
-
if (options?.onEvent) {
|
|
10197
|
-
emitter.setListener(options.onEvent);
|
|
10198
|
-
}
|
|
10199
|
-
if (options?.onProgress) {
|
|
10200
|
-
const legacyProgress = options.onProgress;
|
|
10201
|
-
emitter.setListener((event) => {
|
|
10202
|
-
if (event.type === "convert" && event.stage === "convert_progress") {
|
|
10203
|
-
legacyProgress(event.percent, event.message);
|
|
10204
|
-
}
|
|
10205
|
-
});
|
|
10206
|
-
}
|
|
10207
|
-
try {
|
|
10208
|
-
emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
|
|
10209
|
-
let buffer;
|
|
10210
|
-
try {
|
|
10211
|
-
if (typeof input === "string") {
|
|
10212
|
-
buffer = await readFile(input);
|
|
10213
|
-
} else if (Buffer.isBuffer(input)) {
|
|
10214
|
-
buffer = input;
|
|
10215
|
-
} else {
|
|
10216
|
-
buffer = Buffer.from(input);
|
|
10217
|
-
}
|
|
10218
|
-
} catch (err) {
|
|
10219
|
-
emitter.error(
|
|
10220
|
-
"detect",
|
|
10221
|
-
"PARSE_ERROR",
|
|
10222
|
-
`\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
|
|
10223
|
-
);
|
|
10224
|
-
return {
|
|
10225
|
-
success: false,
|
|
10226
|
-
code: "PARSE_ERROR",
|
|
10227
|
-
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
10228
|
-
stage: "detect"
|
|
10229
|
-
};
|
|
10230
|
-
}
|
|
10231
|
-
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
10232
|
-
if (buffer.length > MAX_FILE_SIZE) {
|
|
10233
|
-
emitter.error(
|
|
10234
|
-
"detect",
|
|
10235
|
-
"FILE_TOO_LARGE",
|
|
10236
|
-
`\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
|
|
10237
|
-
);
|
|
10238
|
-
return {
|
|
10239
|
-
success: false,
|
|
10240
|
-
code: "FILE_TOO_LARGE",
|
|
10241
|
-
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
10242
|
-
stage: "detect"
|
|
10243
|
-
};
|
|
10244
|
-
}
|
|
10245
|
-
const format = detectFormat(toArrayBuffer(buffer));
|
|
10246
|
-
emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
|
|
10247
|
-
if (format !== "hwp" && format !== "hwpx") {
|
|
10248
|
-
emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
|
|
10249
|
-
return {
|
|
10250
|
-
success: false,
|
|
10251
|
-
code: "UNSUPPORTED_FORMAT",
|
|
10252
|
-
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
10253
|
-
stage: "detect"
|
|
10254
|
-
};
|
|
10255
|
-
}
|
|
10256
|
-
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
10257
|
-
let sofficePath;
|
|
10258
|
-
try {
|
|
10259
|
-
sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
|
|
10260
|
-
} catch (err) {
|
|
10261
|
-
if (err instanceof ConvertError) {
|
|
10262
|
-
return {
|
|
10263
|
-
success: false,
|
|
10264
|
-
code: err.code,
|
|
10265
|
-
error: err.message,
|
|
10266
|
-
stage: "validate"
|
|
10267
|
-
};
|
|
10268
|
-
}
|
|
10269
|
-
throw err;
|
|
10270
|
-
}
|
|
10271
|
-
const releaseLock = await acquireConvertLock();
|
|
10272
|
-
try {
|
|
10273
|
-
emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
|
|
10274
|
-
emitter.progress(10, "\uBCC0\uD658 \uC911...");
|
|
10275
|
-
const sourceExt = format === "hwpx" ? ".hwpx" : ".hwp";
|
|
10276
|
-
const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs, sofficePath, sourceExt);
|
|
10277
|
-
emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
|
|
10278
|
-
emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
|
|
10279
|
-
const result = {
|
|
10280
|
-
success: true,
|
|
10281
|
-
pdf: new Uint8Array(pdf),
|
|
10282
|
-
sourceFormat: format
|
|
10283
|
-
};
|
|
10284
|
-
emitter.complete({
|
|
10285
|
-
sourceFormat: format,
|
|
10286
|
-
pdfSize: pdf.length
|
|
10287
|
-
});
|
|
10288
|
-
return result;
|
|
10289
|
-
} catch (err) {
|
|
10290
|
-
if (err instanceof ConvertError) {
|
|
10291
|
-
emitter.error("convert", err.code, err.message);
|
|
10292
|
-
return {
|
|
10293
|
-
success: false,
|
|
10294
|
-
code: err.code,
|
|
10295
|
-
error: err.message,
|
|
10296
|
-
stage: "convert"
|
|
10297
|
-
};
|
|
10298
|
-
}
|
|
10299
|
-
const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
|
|
10300
|
-
emitter.error("convert", classifyError(err), errorMsg);
|
|
10301
|
-
return {
|
|
10302
|
-
success: false,
|
|
10303
|
-
code: classifyError(err),
|
|
10304
|
-
error: errorMsg,
|
|
10305
|
-
stage: "convert"
|
|
10306
|
-
};
|
|
10307
|
-
} finally {
|
|
10308
|
-
releaseLock();
|
|
10309
|
-
}
|
|
10310
|
-
} catch (unexpectedErr) {
|
|
10311
|
-
const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
|
|
10312
|
-
emitter.error("convert", "PARSE_ERROR", errorMsg);
|
|
10313
|
-
return {
|
|
10314
|
-
success: false,
|
|
10315
|
-
code: "PARSE_ERROR",
|
|
10316
|
-
error: errorMsg,
|
|
10317
|
-
stage: "convert"
|
|
10318
|
-
};
|
|
10319
|
-
}
|
|
10320
|
-
}
|
|
10321
|
-
|
|
10322
10145
|
// src/pipeline/unified-ocr.ts
|
|
10323
10146
|
import { performance } from "perf_hooks";
|
|
10324
10147
|
var OCR_PROMPT = [
|
|
@@ -10360,7 +10183,7 @@ async function parse2(input, options) {
|
|
|
10360
10183
|
let buffer;
|
|
10361
10184
|
if (typeof input === "string") {
|
|
10362
10185
|
try {
|
|
10363
|
-
const buf = await
|
|
10186
|
+
const buf = await readFile(input);
|
|
10364
10187
|
buffer = toArrayBuffer(buf);
|
|
10365
10188
|
} catch (err) {
|
|
10366
10189
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
@@ -10695,7 +10518,6 @@ export {
|
|
|
10695
10518
|
extractFormFields,
|
|
10696
10519
|
markdownToHwpx,
|
|
10697
10520
|
markdownToXlsx,
|
|
10698
|
-
convertToPdf,
|
|
10699
10521
|
parse2 as parse
|
|
10700
10522
|
};
|
|
10701
10523
|
/*! Bundled license information:
|
|
@@ -10703,4 +10525,4 @@ export {
|
|
|
10703
10525
|
cfb/cfb.js:
|
|
10704
10526
|
(*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
|
|
10705
10527
|
*/
|
|
10706
|
-
//# sourceMappingURL=chunk-
|
|
10528
|
+
//# sourceMappingURL=chunk-URSQEMVJ.js.map
|