kordoc 2.9.0 → 2.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -8
- package/dist/{chunk-QB7CS534.cjs → chunk-FWAXCTSX.cjs} +2 -2
- package/dist/{chunk-QB7CS534.cjs.map → chunk-FWAXCTSX.cjs.map} +1 -1
- package/dist/{chunk-M24KMDAR.js → chunk-GQQNAYZA.js} +3 -3
- package/dist/chunk-GQQNAYZA.js.map +1 -0
- package/dist/{chunk-SJ5TPMBT.js → chunk-ODF24QXC.js} +2 -2
- package/dist/{chunk-RXZLTACX.js → chunk-Z6TLTWYK.js} +2 -2
- package/dist/cli.js +3 -3
- package/dist/index.cjs +97 -97
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{parser-OMPBVEFU.js → parser-BKYM3LKN.js} +2 -2
- package/dist/{parser-EL5YETUA.cjs → parser-BTIPAEDZ.cjs} +15 -15
- package/dist/{parser-EL5YETUA.cjs.map → parser-BTIPAEDZ.cjs.map} +1 -1
- package/dist/{parser-XBYGROQB.js → parser-FJNQEW7K.js} +2 -2
- package/dist/{watch-ULLLK7ID.js → watch-SBLSWHL7.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-M24KMDAR.js.map +0 -1
- /package/dist/{chunk-SJ5TPMBT.js.map → chunk-ODF24QXC.js.map} +0 -0
- /package/dist/{chunk-RXZLTACX.js.map → chunk-Z6TLTWYK.js.map} +0 -0
- /package/dist/{parser-OMPBVEFU.js.map → parser-BKYM3LKN.js.map} +0 -0
- /package/dist/{parser-XBYGROQB.js.map → parser-FJNQEW7K.js.map} +0 -0
- /package/dist/{watch-ULLLK7ID.js.map → watch-SBLSWHL7.js.map} +0 -0
package/dist/mcp.js
CHANGED
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
fillHwpx,
|
|
9
9
|
markdownToHwpx,
|
|
10
10
|
parse
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-GQQNAYZA.js";
|
|
12
12
|
import {
|
|
13
13
|
detectFormat,
|
|
14
14
|
detectZipFormat
|
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
blocksToMarkdown,
|
|
20
20
|
sanitizeError,
|
|
21
21
|
toArrayBuffer
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-ODF24QXC.js";
|
|
23
23
|
import "./chunk-MOL7MDBG.js";
|
|
24
24
|
|
|
25
25
|
// src/mcp.ts
|
|
@@ -191,7 +191,7 @@ server.tool(
|
|
|
191
191
|
break;
|
|
192
192
|
case "pdf":
|
|
193
193
|
try {
|
|
194
|
-
const { extractPdfMetadataOnly } = await import("./parser-
|
|
194
|
+
const { extractPdfMetadataOnly } = await import("./parser-FJNQEW7K.js");
|
|
195
195
|
metadata = await extractPdfMetadataOnly(buffer);
|
|
196
196
|
} catch {
|
|
197
197
|
metadata = void 0;
|
|
@@ -6,7 +6,7 @@ import {
|
|
|
6
6
|
blocksToMarkdown,
|
|
7
7
|
safeMax,
|
|
8
8
|
safeMin
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-Z6TLTWYK.js";
|
|
10
10
|
import {
|
|
11
11
|
parsePageRange
|
|
12
12
|
} from "./chunk-SBVRCJFH.js";
|
|
@@ -2551,4 +2551,4 @@ export {
|
|
|
2551
2551
|
extractPdfMetadataOnly,
|
|
2552
2552
|
parsePdfDocument
|
|
2553
2553
|
};
|
|
2554
|
-
//# sourceMappingURL=parser-
|
|
2554
|
+
//# sourceMappingURL=parser-BKYM3LKN.js.map
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
var
|
|
9
|
+
var _chunkFWAXCTSXcjs = require('./chunk-FWAXCTSX.cjs');
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -1293,7 +1293,7 @@ async function loadPdfWithTimeout(buffer) {
|
|
|
1293
1293
|
new Promise((_, reject) => {
|
|
1294
1294
|
timer = setTimeout(() => {
|
|
1295
1295
|
loadingTask.destroy();
|
|
1296
|
-
reject(new (0,
|
|
1296
|
+
reject(new (0, _chunkFWAXCTSXcjs.KordocError)("PDF \uB85C\uB529 \uD0C0\uC784\uC544\uC6C3 (30\uCD08 \uCD08\uACFC)"));
|
|
1297
1297
|
}, PDF_LOAD_TIMEOUT_MS);
|
|
1298
1298
|
})
|
|
1299
1299
|
]);
|
|
@@ -1306,7 +1306,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1306
1306
|
const doc = await loadPdfWithTimeout(buffer);
|
|
1307
1307
|
try {
|
|
1308
1308
|
const pageCount = doc.numPages;
|
|
1309
|
-
if (pageCount === 0) throw new (0,
|
|
1309
|
+
if (pageCount === 0) throw new (0, _chunkFWAXCTSXcjs.KordocError)("PDF\uC5D0 \uD398\uC774\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
1310
1310
|
const metadata = { pageCount };
|
|
1311
1311
|
await extractPdfMetadata(doc, metadata);
|
|
1312
1312
|
const blocks = [];
|
|
@@ -1347,11 +1347,11 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1347
1347
|
pageText += pageText ? "\n" + t : t;
|
|
1348
1348
|
}
|
|
1349
1349
|
pageQuality.push(computePageQuality(i, pageText));
|
|
1350
|
-
if (totalTextBytes > MAX_TOTAL_TEXT) throw new (0,
|
|
1350
|
+
if (totalTextBytes > MAX_TOTAL_TEXT) throw new (0, _chunkFWAXCTSXcjs.KordocError)("\uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uD06C\uAE30 \uCD08\uACFC");
|
|
1351
1351
|
parsedPages++;
|
|
1352
1352
|
_optionalChain([options, 'optionalAccess', _12 => _12.onProgress, 'optionalCall', _13 => _13(parsedPages, totalTarget)]);
|
|
1353
1353
|
} catch (pageErr) {
|
|
1354
|
-
if (pageErr instanceof
|
|
1354
|
+
if (pageErr instanceof _chunkFWAXCTSXcjs.KordocError) throw pageErr;
|
|
1355
1355
|
warnings.push({ page: i, message: `\uD398\uC774\uC9C0 ${i} \uD30C\uC2F1 \uC2E4\uD328: ${pageErr instanceof Error ? pageErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1356
1356
|
}
|
|
1357
1357
|
}
|
|
@@ -1368,7 +1368,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1368
1368
|
} catch (e2) {
|
|
1369
1369
|
}
|
|
1370
1370
|
}
|
|
1371
|
-
throw Object.assign(new (0,
|
|
1371
|
+
throw Object.assign(new (0, _chunkFWAXCTSXcjs.KordocError)(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
|
|
1372
1372
|
}
|
|
1373
1373
|
if (_optionalChain([options, 'optionalAccess', _15 => _15.removeHeaderFooter]) !== false && parsedPageCount >= 3) {
|
|
1374
1374
|
const removed = removeHeaderFooterBlocks(blocks, pageHeights, warnings);
|
|
@@ -1393,7 +1393,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1393
1393
|
detectMarkerHeadings(blocks);
|
|
1394
1394
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
1395
1395
|
sanitizeBlockControlChars(blocks);
|
|
1396
|
-
let markdown = cleanPdfText(
|
|
1396
|
+
let markdown = cleanPdfText(_chunkFWAXCTSXcjs.blocksToMarkdown.call(void 0, blocks));
|
|
1397
1397
|
return {
|
|
1398
1398
|
markdown,
|
|
1399
1399
|
blocks,
|
|
@@ -1480,9 +1480,9 @@ function detectHeadings(blocks, medianFontSize) {
|
|
|
1480
1480
|
if (/^\d+$/.test(text)) continue;
|
|
1481
1481
|
const ratio = block.style.fontSize / medianFontSize;
|
|
1482
1482
|
let level = 0;
|
|
1483
|
-
if (ratio >=
|
|
1484
|
-
else if (ratio >=
|
|
1485
|
-
else if (ratio >=
|
|
1483
|
+
if (ratio >= _chunkFWAXCTSXcjs.HEADING_RATIO_H1) level = 1;
|
|
1484
|
+
else if (ratio >= _chunkFWAXCTSXcjs.HEADING_RATIO_H2) level = 2;
|
|
1485
|
+
else if (ratio >= _chunkFWAXCTSXcjs.HEADING_RATIO_H3) level = 3;
|
|
1486
1486
|
if (level > 0) {
|
|
1487
1487
|
block.type = "heading";
|
|
1488
1488
|
block.level = level;
|
|
@@ -1730,7 +1730,7 @@ function extractBlocksWithGrids(items, pageNum, grids, horizontals, verticals) {
|
|
|
1730
1730
|
}
|
|
1731
1731
|
if (remaining.length > 0) {
|
|
1732
1732
|
const allY = remaining.map((i) => i.y);
|
|
1733
|
-
const pageH =
|
|
1733
|
+
const pageH = _chunkFWAXCTSXcjs.safeMax.call(void 0, allY) - _chunkFWAXCTSXcjs.safeMin.call(void 0, allY);
|
|
1734
1734
|
const groups = xyCutOrder(remaining, Math.max(15, pageH * 0.03));
|
|
1735
1735
|
const textBlocks = [];
|
|
1736
1736
|
for (const group of groups) {
|
|
@@ -1818,7 +1818,7 @@ function extractPageBlocksFallback(items, pageNum) {
|
|
|
1818
1818
|
blocks.push({ type: "paragraph", text: tableText, pageNumber: pageNum, bbox, style: dominantStyle(items) });
|
|
1819
1819
|
} else {
|
|
1820
1820
|
const allY = items.map((i) => i.y);
|
|
1821
|
-
const pageHeight =
|
|
1821
|
+
const pageHeight = _chunkFWAXCTSXcjs.safeMax.call(void 0, allY) - _chunkFWAXCTSXcjs.safeMin.call(void 0, allY);
|
|
1822
1822
|
const gapThreshold = Math.max(15, pageHeight * 0.03);
|
|
1823
1823
|
const orderedGroups = xyCutOrder(items, gapThreshold);
|
|
1824
1824
|
for (const group of orderedGroups) {
|
|
@@ -1965,14 +1965,14 @@ function isProseSpread(items) {
|
|
|
1965
1965
|
for (let i = 1; i < sorted.length; i++) {
|
|
1966
1966
|
gaps.push(sorted[i].x - (sorted[i - 1].x + sorted[i - 1].w));
|
|
1967
1967
|
}
|
|
1968
|
-
const maxGap =
|
|
1968
|
+
const maxGap = _chunkFWAXCTSXcjs.safeMax.call(void 0, gaps);
|
|
1969
1969
|
const avgLen = items.reduce((s, i) => s + i.text.length, 0) / items.length;
|
|
1970
1970
|
return maxGap < 40 && avgLen < 5;
|
|
1971
1971
|
}
|
|
1972
1972
|
function detectColumns(yLines) {
|
|
1973
1973
|
const allItems = yLines.flat();
|
|
1974
1974
|
if (allItems.length === 0) return null;
|
|
1975
|
-
const pageWidth =
|
|
1975
|
+
const pageWidth = _chunkFWAXCTSXcjs.safeMax.call(void 0, allItems.map((i) => i.x + i.w)) - _chunkFWAXCTSXcjs.safeMin.call(void 0, allItems.map((i) => i.x));
|
|
1976
1976
|
if (pageWidth < 100) return null;
|
|
1977
1977
|
let bigoLineIdx = -1;
|
|
1978
1978
|
for (let i = 0; i < yLines.length; i++) {
|
|
@@ -2551,4 +2551,4 @@ function formatMb(bytes) {
|
|
|
2551
2551
|
|
|
2552
2552
|
|
|
2553
2553
|
exports.cleanPdfText = cleanPdfText; exports.extractPdfMetadataOnly = extractPdfMetadataOnly; exports.parsePdfDocument = parsePdfDocument;
|
|
2554
|
-
//# sourceMappingURL=parser-
|
|
2554
|
+
//# sourceMappingURL=parser-BTIPAEDZ.cjs.map
|