kordoc 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/dist/{-K5SLEFZD.js → -5BWAV4ZY.js} +5 -3
- package/dist/{chunk-3WRJQQIO.cjs → chunk-NBJB6TJB.cjs} +2 -2
- package/dist/{chunk-3WRJQQIO.cjs.map → chunk-NBJB6TJB.cjs.map} +1 -1
- package/dist/{chunk-SA2PERJ5.js → chunk-O5P6EG5L.js} +2 -2
- package/dist/{chunk-NHXKJWR7.js → chunk-X3SCCO5Q.js} +2 -2
- package/dist/{chunk-326STEDU.js → chunk-X7VQVMXQ.js} +845 -12
- package/dist/chunk-X7VQVMXQ.js.map +1 -0
- package/dist/cli.js +9 -7
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +964 -131
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +27 -1
- package/dist/index.d.ts +27 -1
- package/dist/index.js +844 -11
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{parser-AU2NLC44.js → parser-3N6FZSKU.js} +2 -2
- package/dist/{parser-5KHU732L.cjs → parser-5FZJVLQL.cjs} +14 -14
- package/dist/{parser-5KHU732L.cjs.map → parser-5FZJVLQL.cjs.map} +1 -1
- package/dist/{parser-4IVYHKSL.js → parser-LZH7ZELV.js} +2 -2
- package/dist/{watch-5DDN4BUI.js → watch-4FMRS7QU.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-326STEDU.js.map +0 -1
- /package/dist/{-K5SLEFZD.js.map → -5BWAV4ZY.js.map} +0 -0
- /package/dist/{chunk-SA2PERJ5.js.map → chunk-O5P6EG5L.js.map} +0 -0
- /package/dist/{chunk-NHXKJWR7.js.map → chunk-X3SCCO5Q.js.map} +0 -0
- /package/dist/{parser-AU2NLC44.js.map → parser-3N6FZSKU.js.map} +0 -0
- /package/dist/{parser-4IVYHKSL.js.map → parser-LZH7ZELV.js.map} +0 -0
- /package/dist/{watch-5DDN4BUI.js.map → watch-4FMRS7QU.js.map} +0 -0
package/dist/mcp.js
CHANGED
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
fillHwpx,
|
|
9
9
|
markdownToHwpx,
|
|
10
10
|
parse
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-X7VQVMXQ.js";
|
|
12
12
|
import {
|
|
13
13
|
detectFormat,
|
|
14
14
|
detectZipFormat
|
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
blocksToMarkdown,
|
|
20
20
|
sanitizeError,
|
|
21
21
|
toArrayBuffer
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-O5P6EG5L.js";
|
|
23
23
|
import "./chunk-MOL7MDBG.js";
|
|
24
24
|
|
|
25
25
|
// src/mcp.ts
|
|
@@ -191,7 +191,7 @@ server.tool(
|
|
|
191
191
|
break;
|
|
192
192
|
case "pdf":
|
|
193
193
|
try {
|
|
194
|
-
const { extractPdfMetadataOnly } = await import("./parser-
|
|
194
|
+
const { extractPdfMetadataOnly } = await import("./parser-3N6FZSKU.js");
|
|
195
195
|
metadata = await extractPdfMetadataOnly(buffer);
|
|
196
196
|
} catch {
|
|
197
197
|
metadata = void 0;
|
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
blocksToMarkdown,
|
|
8
8
|
safeMax,
|
|
9
9
|
safeMin
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-O5P6EG5L.js";
|
|
11
11
|
import {
|
|
12
12
|
parsePageRange
|
|
13
13
|
} from "./chunk-MOL7MDBG.js";
|
|
@@ -3144,4 +3144,4 @@ export {
|
|
|
3144
3144
|
parsePdfDocument,
|
|
3145
3145
|
removeHeaderFooterBlocks
|
|
3146
3146
|
};
|
|
3147
|
-
//# sourceMappingURL=parser-
|
|
3147
|
+
//# sourceMappingURL=parser-3N6FZSKU.js.map
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
var
|
|
9
|
+
var _chunkNBJB6TJBcjs = require('./chunk-NBJB6TJB.cjs');
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -1495,7 +1495,7 @@ async function loadPdfWithTimeout(buffer) {
|
|
|
1495
1495
|
new Promise((_, reject) => {
|
|
1496
1496
|
timer = setTimeout(() => {
|
|
1497
1497
|
loadingTask.destroy();
|
|
1498
|
-
reject(new (0,
|
|
1498
|
+
reject(new (0, _chunkNBJB6TJBcjs.KordocError)("PDF \uB85C\uB529 \uD0C0\uC784\uC544\uC6C3 (30\uCD08 \uCD08\uACFC)"));
|
|
1499
1499
|
}, PDF_LOAD_TIMEOUT_MS);
|
|
1500
1500
|
})
|
|
1501
1501
|
]);
|
|
@@ -1508,7 +1508,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1508
1508
|
const doc = await loadPdfWithTimeout(buffer);
|
|
1509
1509
|
try {
|
|
1510
1510
|
const pageCount = doc.numPages;
|
|
1511
|
-
if (pageCount === 0) throw new (0,
|
|
1511
|
+
if (pageCount === 0) throw new (0, _chunkNBJB6TJBcjs.KordocError)("PDF\uC5D0 \uD398\uC774\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
1512
1512
|
const metadata = { pageCount };
|
|
1513
1513
|
await extractPdfMetadata(doc, metadata);
|
|
1514
1514
|
const blocks = [];
|
|
@@ -1568,11 +1568,11 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1568
1568
|
pageText += pageText ? "\n" + t : t;
|
|
1569
1569
|
}
|
|
1570
1570
|
pageQuality.push(computePageQuality(i, pageText));
|
|
1571
|
-
if (totalTextBytes > MAX_TOTAL_TEXT) throw new (0,
|
|
1571
|
+
if (totalTextBytes > MAX_TOTAL_TEXT) throw new (0, _chunkNBJB6TJBcjs.KordocError)("\uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uD06C\uAE30 \uCD08\uACFC");
|
|
1572
1572
|
parsedPages++;
|
|
1573
1573
|
_optionalChain([options, 'optionalAccess', _12 => _12.onProgress, 'optionalCall', _13 => _13(parsedPages, totalTarget)]);
|
|
1574
1574
|
} catch (pageErr) {
|
|
1575
|
-
if (pageErr instanceof
|
|
1575
|
+
if (pageErr instanceof _chunkNBJB6TJBcjs.KordocError) throw pageErr;
|
|
1576
1576
|
warnings.push({ page: i, message: `\uD398\uC774\uC9C0 ${i} \uD30C\uC2F1 \uC2E4\uD328: ${pageErr instanceof Error ? pageErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1577
1577
|
}
|
|
1578
1578
|
}
|
|
@@ -1640,7 +1640,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1640
1640
|
detectKoreanListBlocks(blocks);
|
|
1641
1641
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
1642
1642
|
sanitizeBlockControlChars(blocks);
|
|
1643
|
-
let markdown = cleanPdfText(
|
|
1643
|
+
let markdown = cleanPdfText(_chunkNBJB6TJBcjs.blocksToMarkdown.call(void 0, blocks));
|
|
1644
1644
|
return {
|
|
1645
1645
|
markdown,
|
|
1646
1646
|
blocks,
|
|
@@ -1728,9 +1728,9 @@ function detectHeadings(blocks, medianFontSize) {
|
|
|
1728
1728
|
if (/^\d+$/.test(text)) continue;
|
|
1729
1729
|
const ratio = block.style.fontSize / medianFontSize;
|
|
1730
1730
|
let level = 0;
|
|
1731
|
-
if (ratio >=
|
|
1732
|
-
else if (ratio >=
|
|
1733
|
-
else if (ratio >=
|
|
1731
|
+
if (ratio >= _chunkNBJB6TJBcjs.HEADING_RATIO_H1) level = 1;
|
|
1732
|
+
else if (ratio >= _chunkNBJB6TJBcjs.HEADING_RATIO_H2) level = 2;
|
|
1733
|
+
else if (ratio >= _chunkNBJB6TJBcjs.HEADING_RATIO_H3) level = 3;
|
|
1734
1734
|
if (level > 0) {
|
|
1735
1735
|
block.type = "heading";
|
|
1736
1736
|
block.level = level;
|
|
@@ -2135,7 +2135,7 @@ function extractBlocksWithGrids(items, pageNum, grids, horizontals, verticals) {
|
|
|
2135
2135
|
}
|
|
2136
2136
|
if (remaining.length > 0) {
|
|
2137
2137
|
const allY = remaining.map((i) => i.y);
|
|
2138
|
-
const pageH =
|
|
2138
|
+
const pageH = _chunkNBJB6TJBcjs.safeMax.call(void 0, allY) - _chunkNBJB6TJBcjs.safeMin.call(void 0, allY);
|
|
2139
2139
|
const groups = xyCutOrder(remaining, Math.max(15, pageH * 0.03));
|
|
2140
2140
|
const textBlocks = [];
|
|
2141
2141
|
for (const group of groups) {
|
|
@@ -2264,7 +2264,7 @@ function extractPageBlocksFallback(items, pageNum) {
|
|
|
2264
2264
|
blocks.push({ type: "paragraph", text: tableText, pageNumber: pageNum, bbox, style: dominantStyle(items) });
|
|
2265
2265
|
} else {
|
|
2266
2266
|
const allY = items.map((i) => i.y);
|
|
2267
|
-
const pageHeight =
|
|
2267
|
+
const pageHeight = _chunkNBJB6TJBcjs.safeMax.call(void 0, allY) - _chunkNBJB6TJBcjs.safeMin.call(void 0, allY);
|
|
2268
2268
|
const gapThreshold = Math.max(15, pageHeight * 0.03);
|
|
2269
2269
|
const orderedGroups = xyCutOrder(items, gapThreshold);
|
|
2270
2270
|
for (const group of orderedGroups) {
|
|
@@ -2440,14 +2440,14 @@ function isProseSpread(items) {
|
|
|
2440
2440
|
for (let i = 1; i < sorted.length; i++) {
|
|
2441
2441
|
gaps.push(sorted[i].x - (sorted[i - 1].x + sorted[i - 1].w));
|
|
2442
2442
|
}
|
|
2443
|
-
const maxGap =
|
|
2443
|
+
const maxGap = _chunkNBJB6TJBcjs.safeMax.call(void 0, gaps);
|
|
2444
2444
|
const avgLen = items.reduce((s, i) => s + i.text.length, 0) / items.length;
|
|
2445
2445
|
return maxGap < 40 && avgLen < 5;
|
|
2446
2446
|
}
|
|
2447
2447
|
function detectColumns(yLines) {
|
|
2448
2448
|
const allItems = yLines.flat();
|
|
2449
2449
|
if (allItems.length === 0) return null;
|
|
2450
|
-
const pageWidth =
|
|
2450
|
+
const pageWidth = _chunkNBJB6TJBcjs.safeMax.call(void 0, allItems.map((i) => i.x + i.w)) - _chunkNBJB6TJBcjs.safeMin.call(void 0, allItems.map((i) => i.x));
|
|
2451
2451
|
if (pageWidth < 100) return null;
|
|
2452
2452
|
let bigoLineIdx = -1;
|
|
2453
2453
|
for (let i = 0; i < yLines.length; i++) {
|
|
@@ -3143,4 +3143,4 @@ function formatMb(bytes) {
|
|
|
3143
3143
|
|
|
3144
3144
|
|
|
3145
3145
|
exports.cleanPdfText = cleanPdfText; exports.detectKoreanListBlocks = detectKoreanListBlocks; exports.detectTableCaptions = detectTableCaptions; exports.extractPdfMetadataOnly = extractPdfMetadataOnly; exports.mergeCrossPageTables = mergeCrossPageTables; exports.parsePdfDocument = parsePdfDocument; exports.removeHeaderFooterBlocks = removeHeaderFooterBlocks;
|
|
3146
|
-
//# sourceMappingURL=parser-
|
|
3146
|
+
//# sourceMappingURL=parser-5FZJVLQL.cjs.map
|