kordoc 2.0.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/dist/{chunk-XJYM2AUA.js → chunk-GJ2S6IMC.js} +457 -35
- package/dist/chunk-GJ2S6IMC.js.map +1 -0
- package/dist/{chunk-EVWOJ4T5.js → chunk-PKIJLEV6.js} +2 -2
- package/dist/cli.js +4 -4
- package/dist/index.cjs +456 -33
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +456 -33
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{provider-A4FHJSID.js → provider-7H4CPZYS.js} +2 -1
- package/dist/provider-7H4CPZYS.js.map +1 -0
- package/dist/{utils-6JEIFBCJ.js → utils-BWQ2RGUD.js} +2 -2
- package/dist/{watch-BCPDLGOE.js → watch-X7IC7MLF.js} +9 -5
- package/dist/watch-X7IC7MLF.js.map +1 -0
- package/package.json +1 -1
- package/dist/chunk-XJYM2AUA.js.map +0 -1
- package/dist/provider-A4FHJSID.js.map +0 -1
- package/dist/watch-BCPDLGOE.js.map +0 -1
- /package/dist/{chunk-EVWOJ4T5.js.map → chunk-PKIJLEV6.js.map} +0 -0
- /package/dist/{utils-6JEIFBCJ.js.map → utils-BWQ2RGUD.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -85,6 +85,7 @@ async function ocrPages(doc, provider, pageFilter, effectivePageCount) {
|
|
|
85
85
|
blocks.push({ type: "paragraph", text: text.trim(), pageNumber: i });
|
|
86
86
|
}
|
|
87
87
|
} catch {
|
|
88
|
+
blocks.push({ type: "paragraph", text: `[OCR \uC2E4\uD328: \uD398\uC774\uC9C0 ${i}]` });
|
|
88
89
|
}
|
|
89
90
|
}
|
|
90
91
|
return blocks;
|
|
@@ -182,7 +183,7 @@ var import_zlib = require("zlib");
|
|
|
182
183
|
var import_xmldom = require("@xmldom/xmldom");
|
|
183
184
|
|
|
184
185
|
// src/utils.ts
|
|
185
|
-
var VERSION = true ? "2.0
|
|
186
|
+
var VERSION = true ? "2.1.0" : "0.0.0-dev";
|
|
186
187
|
function toArrayBuffer(buf) {
|
|
187
188
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
188
189
|
return buf.buffer;
|
|
@@ -371,6 +372,47 @@ function sanitizeText(text) {
|
|
|
371
372
|
}
|
|
372
373
|
return result;
|
|
373
374
|
}
|
|
375
|
+
function flattenLayoutTables(blocks) {
|
|
376
|
+
const result = [];
|
|
377
|
+
for (const block of blocks) {
|
|
378
|
+
if (block.type !== "table" || !block.table) {
|
|
379
|
+
result.push(block);
|
|
380
|
+
continue;
|
|
381
|
+
}
|
|
382
|
+
const { rows: numRows, cols: numCols, cells } = block.table;
|
|
383
|
+
if (numRows === 1 && numCols === 1) {
|
|
384
|
+
result.push(block);
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
387
|
+
if (numRows <= 3) {
|
|
388
|
+
let totalNewlines = 0;
|
|
389
|
+
let totalTextLen = 0;
|
|
390
|
+
for (let r = 0; r < numRows; r++) {
|
|
391
|
+
for (let c = 0; c < numCols; c++) {
|
|
392
|
+
const t = cells[r]?.[c]?.text || "";
|
|
393
|
+
totalNewlines += (t.match(/\n/g) || []).length;
|
|
394
|
+
totalTextLen += t.length;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
if (totalNewlines > 5 || numRows <= 2 && totalTextLen > 300) {
|
|
398
|
+
for (let r = 0; r < numRows; r++) {
|
|
399
|
+
for (let c = 0; c < numCols; c++) {
|
|
400
|
+
const cellText = cells[r]?.[c]?.text?.trim();
|
|
401
|
+
if (!cellText) continue;
|
|
402
|
+
for (const line of cellText.split("\n")) {
|
|
403
|
+
const trimmed = line.trim();
|
|
404
|
+
if (!trimmed) continue;
|
|
405
|
+
result.push({ type: "paragraph", text: trimmed, pageNumber: block.pageNumber });
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
result.push(block);
|
|
413
|
+
}
|
|
414
|
+
return result;
|
|
415
|
+
}
|
|
374
416
|
function blocksToMarkdown(blocks) {
|
|
375
417
|
const lines = [];
|
|
376
418
|
for (let i = 0; i < blocks.length; i++) {
|
|
@@ -471,6 +513,9 @@ function tableToMarkdown(table) {
|
|
|
471
513
|
if (dr === 0 && dc === 0) continue;
|
|
472
514
|
if (r + dr < numRows && c + dc < numCols) {
|
|
473
515
|
skip.add(`${r + dr},${c + dc}`);
|
|
516
|
+
if (dr === 0) {
|
|
517
|
+
display[r][c + dc] = cell.text.replace(/\n/g, "<br>");
|
|
518
|
+
}
|
|
474
519
|
}
|
|
475
520
|
}
|
|
476
521
|
}
|
|
@@ -566,7 +611,12 @@ function parseCharProperties(doc, map) {
|
|
|
566
611
|
if (!id) continue;
|
|
567
612
|
const prop = {};
|
|
568
613
|
const height = el.getAttribute("height");
|
|
569
|
-
if (height)
|
|
614
|
+
if (height) {
|
|
615
|
+
const parsedHeight = parseInt(height, 10);
|
|
616
|
+
if (!isNaN(parsedHeight) && parsedHeight > 0) {
|
|
617
|
+
prop.fontSize = parsedHeight / 100;
|
|
618
|
+
}
|
|
619
|
+
}
|
|
570
620
|
const bold = el.getAttribute("bold");
|
|
571
621
|
if (bold === "true" || bold === "1") prop.bold = true;
|
|
572
622
|
const italic = el.getAttribute("italic");
|
|
@@ -706,7 +756,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
706
756
|
const data = await file.async("uint8array");
|
|
707
757
|
decompressed.total += data.length;
|
|
708
758
|
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new KordocError("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
709
|
-
const ext = ref.includes(".") ? ref.split(".").pop() : "png";
|
|
759
|
+
const ext = ref.includes(".") ? ref.split(".").pop() || "png" : "png";
|
|
710
760
|
const mimeType = imageExtToMime(ext);
|
|
711
761
|
imageIndex++;
|
|
712
762
|
const filename = `image_${String(imageIndex).padStart(3, "0")}.${mimeToExt(mimeType)}`;
|
|
@@ -1000,8 +1050,10 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
|
|
|
1000
1050
|
break;
|
|
1001
1051
|
case "cellSpan":
|
|
1002
1052
|
if (tableCtx?.cell) {
|
|
1003
|
-
const
|
|
1004
|
-
const
|
|
1053
|
+
const rawCs = parseInt(el.getAttribute("colSpan") || "1", 10);
|
|
1054
|
+
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
1055
|
+
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
1056
|
+
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
1005
1057
|
tableCtx.cell.colSpan = clampSpan(cs, MAX_COLS);
|
|
1006
1058
|
tableCtx.cell.rowSpan = clampSpan(rs, MAX_ROWS);
|
|
1007
1059
|
}
|
|
@@ -1093,6 +1145,8 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
|
|
|
1093
1145
|
extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
|
|
1094
1146
|
} else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
|
|
1095
1147
|
walkChildren(el, d + 1);
|
|
1148
|
+
} else if (localTag === "run") {
|
|
1149
|
+
tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
|
|
1096
1150
|
}
|
|
1097
1151
|
}
|
|
1098
1152
|
};
|
|
@@ -1269,8 +1323,9 @@ var TAG_CHAR_SHAPE = 68;
|
|
|
1269
1323
|
var TAG_CTRL_HEADER = 71;
|
|
1270
1324
|
var TAG_LIST_HEADER = 72;
|
|
1271
1325
|
var TAG_TABLE = 77;
|
|
1272
|
-
var TAG_DOC_CHAR_SHAPE =
|
|
1273
|
-
var
|
|
1326
|
+
var TAG_DOC_CHAR_SHAPE = 21;
|
|
1327
|
+
var TAG_DOC_PARA_SHAPE = 25;
|
|
1328
|
+
var TAG_DOC_STYLE = 26;
|
|
1274
1329
|
var CHAR_LINE = 0;
|
|
1275
1330
|
var CHAR_SECTION_BREAK = 10;
|
|
1276
1331
|
var CHAR_PARA = 13;
|
|
@@ -1326,8 +1381,14 @@ function parseFileHeader(data) {
|
|
|
1326
1381
|
}
|
|
1327
1382
|
function parseDocInfo(records) {
|
|
1328
1383
|
const charShapes = [];
|
|
1384
|
+
const paraShapes = [];
|
|
1329
1385
|
const styles = [];
|
|
1330
1386
|
for (const rec of records) {
|
|
1387
|
+
if (rec.tagId === TAG_DOC_PARA_SHAPE && rec.data.length >= 4) {
|
|
1388
|
+
const flags = rec.data.readUInt32LE(0);
|
|
1389
|
+
const outlineLevel = flags >> 25 & 7;
|
|
1390
|
+
paraShapes.push({ outlineLevel });
|
|
1391
|
+
}
|
|
1331
1392
|
if (rec.tagId === TAG_DOC_CHAR_SHAPE && rec.data.length >= 18) {
|
|
1332
1393
|
if (rec.data.length >= 50) {
|
|
1333
1394
|
const fontSize = rec.data.readUInt32LE(42);
|
|
@@ -1367,7 +1428,7 @@ function parseDocInfo(records) {
|
|
|
1367
1428
|
}
|
|
1368
1429
|
}
|
|
1369
1430
|
}
|
|
1370
|
-
return { charShapes, styles };
|
|
1431
|
+
return { charShapes, paraShapes, styles };
|
|
1371
1432
|
}
|
|
1372
1433
|
function extractText(data) {
|
|
1373
1434
|
let result = "";
|
|
@@ -2379,12 +2440,13 @@ function parseHwp5Document(buffer, options) {
|
|
|
2379
2440
|
}
|
|
2380
2441
|
}
|
|
2381
2442
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
2443
|
+
const flatBlocks = flattenLayoutTables(blocks);
|
|
2382
2444
|
if (docInfo) {
|
|
2383
|
-
detectHwp5Headings(
|
|
2445
|
+
detectHwp5Headings(flatBlocks, docInfo);
|
|
2384
2446
|
}
|
|
2385
|
-
const outline =
|
|
2386
|
-
const markdown = blocksToMarkdown(
|
|
2387
|
-
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2447
|
+
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
2448
|
+
const markdown = blocksToMarkdown(flatBlocks);
|
|
2449
|
+
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2388
2450
|
}
|
|
2389
2451
|
function parseDocInfoStream(cfb, compressed) {
|
|
2390
2452
|
try {
|
|
@@ -2435,16 +2497,21 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2435
2497
|
}
|
|
2436
2498
|
if (baseFontSize <= 0) return;
|
|
2437
2499
|
for (const block of blocks) {
|
|
2438
|
-
if (block.type
|
|
2500
|
+
if (block.type === "heading") continue;
|
|
2501
|
+
if (block.type !== "paragraph" || !block.text) continue;
|
|
2439
2502
|
const text = block.text.trim();
|
|
2440
2503
|
if (text.length === 0 || text.length > 200) continue;
|
|
2441
2504
|
if (/^\d+$/.test(text)) continue;
|
|
2442
|
-
const ratio = block.style.fontSize / baseFontSize;
|
|
2443
2505
|
let level = 0;
|
|
2444
|
-
if (
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2506
|
+
if (block.style?.fontSize && baseFontSize > 0) {
|
|
2507
|
+
const ratio = block.style.fontSize / baseFontSize;
|
|
2508
|
+
if (ratio >= HEADING_RATIO_H1) level = 1;
|
|
2509
|
+
else if (ratio >= HEADING_RATIO_H2) level = 2;
|
|
2510
|
+
else if (ratio >= HEADING_RATIO_H3) level = 3;
|
|
2511
|
+
}
|
|
2512
|
+
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2513
|
+
if (level === 0) level = 2;
|
|
2514
|
+
} else if (/^제\d+(조의?\d*)\s*[\((]/.test(text) && text.length <= 80) {
|
|
2448
2515
|
if (level === 0) level = 3;
|
|
2449
2516
|
}
|
|
2450
2517
|
if (level > 0) {
|
|
@@ -2676,13 +2743,20 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
2676
2743
|
while (i < records.length) {
|
|
2677
2744
|
const rec = records[i];
|
|
2678
2745
|
if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
|
|
2679
|
-
const { paragraph, tables, nextIdx, charShapeIds } = parseParagraphWithTables(records, i);
|
|
2746
|
+
const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i);
|
|
2680
2747
|
if (paragraph) {
|
|
2681
2748
|
const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
|
|
2682
2749
|
if (docInfo && charShapeIds.length > 0) {
|
|
2683
2750
|
const style = resolveCharStyle(charShapeIds, docInfo);
|
|
2684
2751
|
if (style) block.style = style;
|
|
2685
2752
|
}
|
|
2753
|
+
if (docInfo && paraShapeId >= 0 && paraShapeId < docInfo.paraShapes.length) {
|
|
2754
|
+
const ol = docInfo.paraShapes[paraShapeId].outlineLevel;
|
|
2755
|
+
if (ol >= 1 && ol <= 6) {
|
|
2756
|
+
block.type = "heading";
|
|
2757
|
+
block.level = ol;
|
|
2758
|
+
}
|
|
2759
|
+
}
|
|
2686
2760
|
blocks.push(block);
|
|
2687
2761
|
}
|
|
2688
2762
|
for (const t of tables) blocks.push({ type: "table", table: t, pageNumber: sectionNum });
|
|
@@ -2802,6 +2876,8 @@ function parseParagraphWithTables(records, startIdx) {
|
|
|
2802
2876
|
let text = "";
|
|
2803
2877
|
const tables = [];
|
|
2804
2878
|
const charShapeIds = [];
|
|
2879
|
+
const paraHeaderData = records[startIdx].data;
|
|
2880
|
+
const paraShapeId = paraHeaderData.length >= 10 ? paraHeaderData.readUInt16LE(8) : -1;
|
|
2805
2881
|
let i = startIdx + 1;
|
|
2806
2882
|
while (i < records.length) {
|
|
2807
2883
|
const rec = records[i];
|
|
@@ -2826,7 +2902,7 @@ function parseParagraphWithTables(records, startIdx) {
|
|
|
2826
2902
|
i++;
|
|
2827
2903
|
}
|
|
2828
2904
|
const trimmed = text.trim();
|
|
2829
|
-
return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds };
|
|
2905
|
+
return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
|
|
2830
2906
|
}
|
|
2831
2907
|
function parseTableBlock(records, startIdx) {
|
|
2832
2908
|
const tableLevel = records[startIdx].level;
|
|
@@ -2939,10 +3015,33 @@ var MIN_LINE_LENGTH = 10;
|
|
|
2939
3015
|
var COORD_MERGE_TOL = 3;
|
|
2940
3016
|
var CONNECT_TOL = 5;
|
|
2941
3017
|
var CELL_PADDING = 2;
|
|
3018
|
+
var MAX_LINE_WIDTH = 5;
|
|
3019
|
+
var IDENTITY = [1, 0, 0, 1, 0, 0];
|
|
3020
|
+
function matMultiply(m1, m2) {
|
|
3021
|
+
return [
|
|
3022
|
+
m1[0] * m2[0] + m1[2] * m2[1],
|
|
3023
|
+
m1[1] * m2[0] + m1[3] * m2[1],
|
|
3024
|
+
m1[0] * m2[2] + m1[2] * m2[3],
|
|
3025
|
+
m1[1] * m2[2] + m1[3] * m2[3],
|
|
3026
|
+
m1[0] * m2[4] + m1[2] * m2[5] + m1[4],
|
|
3027
|
+
m1[1] * m2[4] + m1[3] * m2[5] + m1[5]
|
|
3028
|
+
];
|
|
3029
|
+
}
|
|
3030
|
+
function matTransformPoint(m, x, y) {
|
|
3031
|
+
return [m[0] * x + m[2] * y + m[4], m[1] * x + m[3] * y + m[5]];
|
|
3032
|
+
}
|
|
3033
|
+
function matScale(m) {
|
|
3034
|
+
return Math.max(
|
|
3035
|
+
Math.sqrt(m[1] * m[1] + m[3] * m[3]),
|
|
3036
|
+
Math.sqrt(m[0] * m[0] + m[2] * m[2])
|
|
3037
|
+
);
|
|
3038
|
+
}
|
|
2942
3039
|
function extractLines(fnArray, argsArray) {
|
|
2943
3040
|
const horizontals = [];
|
|
2944
3041
|
const verticals = [];
|
|
3042
|
+
let ctm = [...IDENTITY];
|
|
2945
3043
|
let lineWidth = 1;
|
|
3044
|
+
const stateStack = [];
|
|
2946
3045
|
let currentPath = [];
|
|
2947
3046
|
let pathStartX = 0, pathStartY = 0;
|
|
2948
3047
|
let curX = 0, curY = 0;
|
|
@@ -2960,13 +3059,53 @@ function extractLines(fnArray, argsArray) {
|
|
|
2960
3059
|
);
|
|
2961
3060
|
}
|
|
2962
3061
|
}
|
|
2963
|
-
function
|
|
2964
|
-
if (
|
|
3062
|
+
function tryConvertLinesToRectangle(path) {
|
|
3063
|
+
if (path.length < 3 || path.length > 5) return false;
|
|
3064
|
+
const first = path[0], last = path[path.length - 1];
|
|
3065
|
+
const closed = Math.abs(first.x1 - last.x2) < 1 && Math.abs(first.y1 - last.y2) < 1;
|
|
3066
|
+
if (!closed) return false;
|
|
3067
|
+
let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity;
|
|
3068
|
+
for (const seg of path) {
|
|
3069
|
+
minX = Math.min(minX, seg.x1, seg.x2);
|
|
3070
|
+
minY = Math.min(minY, seg.y1, seg.y2);
|
|
3071
|
+
maxX = Math.max(maxX, seg.x1, seg.x2);
|
|
3072
|
+
maxY = Math.max(maxY, seg.y1, seg.y2);
|
|
3073
|
+
}
|
|
3074
|
+
const w = maxX - minX, h = maxY - minY;
|
|
3075
|
+
if (w < MIN_LINE_LENGTH && h < MIN_LINE_LENGTH) return false;
|
|
3076
|
+
path.length = 0;
|
|
3077
|
+
if (h < ORIENTATION_TOL * 2 || w > MIN_LINE_LENGTH && h <= MAX_LINE_WIDTH) {
|
|
3078
|
+
path.push({ x1: minX, y1: (minY + maxY) / 2, x2: maxX, y2: (minY + maxY) / 2 });
|
|
3079
|
+
} else if (w < ORIENTATION_TOL * 2 || h > MIN_LINE_LENGTH && w <= MAX_LINE_WIDTH) {
|
|
3080
|
+
path.push({ x1: (minX + maxX) / 2, y1: minY, x2: (minX + maxX) / 2, y2: maxY });
|
|
3081
|
+
} else {
|
|
3082
|
+
pushRectangle(path, minX, minY, w, h);
|
|
3083
|
+
}
|
|
3084
|
+
return true;
|
|
3085
|
+
}
|
|
3086
|
+
function flushPath(isStroke, isFill) {
|
|
3087
|
+
if (!isStroke && !isFill) {
|
|
3088
|
+
currentPath = [];
|
|
3089
|
+
return;
|
|
3090
|
+
}
|
|
3091
|
+
if (isFill && !isStroke && currentPath.length >= 3) {
|
|
3092
|
+
tryConvertLinesToRectangle(currentPath);
|
|
3093
|
+
}
|
|
3094
|
+
const scale = matScale(ctm);
|
|
3095
|
+
const effectiveLW = lineWidth * scale;
|
|
3096
|
+
if (effectiveLW > MAX_LINE_WIDTH && isStroke && !isFill) {
|
|
2965
3097
|
currentPath = [];
|
|
2966
3098
|
return;
|
|
2967
3099
|
}
|
|
2968
3100
|
for (const seg of currentPath) {
|
|
2969
|
-
|
|
3101
|
+
const [px1, py1] = matTransformPoint(ctm, seg.x1, seg.y1);
|
|
3102
|
+
const [px2, py2] = matTransformPoint(ctm, seg.x2, seg.y2);
|
|
3103
|
+
classifyAndAdd(
|
|
3104
|
+
{ x1: px1, y1: py1, x2: px2, y2: py2 },
|
|
3105
|
+
effectiveLW,
|
|
3106
|
+
horizontals,
|
|
3107
|
+
verticals
|
|
3108
|
+
);
|
|
2970
3109
|
}
|
|
2971
3110
|
currentPath = [];
|
|
2972
3111
|
}
|
|
@@ -2974,9 +3113,28 @@ function extractLines(fnArray, argsArray) {
|
|
|
2974
3113
|
const op = fnArray[i];
|
|
2975
3114
|
const args = argsArray[i];
|
|
2976
3115
|
switch (op) {
|
|
3116
|
+
// ── Graphics State ──
|
|
3117
|
+
case import_pdf.OPS.save:
|
|
3118
|
+
stateStack.push({ ctm: [...ctm], lineWidth });
|
|
3119
|
+
break;
|
|
3120
|
+
case import_pdf.OPS.restore:
|
|
3121
|
+
if (stateStack.length > 0) {
|
|
3122
|
+
const state = stateStack.pop();
|
|
3123
|
+
ctm = state.ctm;
|
|
3124
|
+
lineWidth = state.lineWidth;
|
|
3125
|
+
}
|
|
3126
|
+
break;
|
|
3127
|
+
case import_pdf.OPS.transform: {
|
|
3128
|
+
const m = args;
|
|
3129
|
+
if (m.length >= 6) {
|
|
3130
|
+
ctm = matMultiply(ctm, [m[0], m[1], m[2], m[3], m[4], m[5]]);
|
|
3131
|
+
}
|
|
3132
|
+
break;
|
|
3133
|
+
}
|
|
2977
3134
|
case import_pdf.OPS.setLineWidth:
|
|
2978
3135
|
lineWidth = args[0] || 1;
|
|
2979
3136
|
break;
|
|
3137
|
+
// ── Path Construction ──
|
|
2980
3138
|
case import_pdf.OPS.constructPath: {
|
|
2981
3139
|
const arg0 = args[0];
|
|
2982
3140
|
if (Array.isArray(arg0)) {
|
|
@@ -3044,34 +3202,60 @@ function extractLines(fnArray, argsArray) {
|
|
|
3044
3202
|
}
|
|
3045
3203
|
}
|
|
3046
3204
|
}
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
|
|
3050
|
-
|
|
3205
|
+
const isStroke5 = afterOp === import_pdf.OPS.stroke || afterOp === import_pdf.OPS.closeStroke;
|
|
3206
|
+
const isFill5 = afterOp === import_pdf.OPS.fill || afterOp === import_pdf.OPS.eoFill;
|
|
3207
|
+
const isBoth5 = afterOp === import_pdf.OPS.fillStroke || afterOp === import_pdf.OPS.eoFillStroke || afterOp === import_pdf.OPS.closeFillStroke || afterOp === import_pdf.OPS.closeEOFillStroke;
|
|
3208
|
+
if (isStroke5 || isFill5 || isBoth5) {
|
|
3209
|
+
flushPath(isStroke5 || isBoth5, isFill5 || isBoth5);
|
|
3051
3210
|
} else if (afterOp === import_pdf.OPS.endPath) {
|
|
3052
|
-
flushPath(false);
|
|
3211
|
+
flushPath(false, false);
|
|
3053
3212
|
}
|
|
3054
3213
|
}
|
|
3055
3214
|
break;
|
|
3056
3215
|
}
|
|
3216
|
+
// ── Paint Operations ──
|
|
3057
3217
|
case import_pdf.OPS.stroke:
|
|
3058
3218
|
case import_pdf.OPS.closeStroke:
|
|
3059
|
-
flushPath(true);
|
|
3219
|
+
flushPath(true, false);
|
|
3060
3220
|
break;
|
|
3061
3221
|
case import_pdf.OPS.fill:
|
|
3062
3222
|
case import_pdf.OPS.eoFill:
|
|
3223
|
+
flushPath(false, true);
|
|
3224
|
+
break;
|
|
3063
3225
|
case import_pdf.OPS.fillStroke:
|
|
3064
3226
|
case import_pdf.OPS.eoFillStroke:
|
|
3065
3227
|
case import_pdf.OPS.closeFillStroke:
|
|
3066
3228
|
case import_pdf.OPS.closeEOFillStroke:
|
|
3067
|
-
flushPath(true);
|
|
3229
|
+
flushPath(true, true);
|
|
3068
3230
|
break;
|
|
3069
3231
|
case import_pdf.OPS.endPath:
|
|
3070
|
-
flushPath(false);
|
|
3232
|
+
flushPath(false, false);
|
|
3071
3233
|
break;
|
|
3072
3234
|
}
|
|
3073
3235
|
}
|
|
3074
|
-
return {
|
|
3236
|
+
return {
|
|
3237
|
+
horizontals: deduplicateLines(horizontals),
|
|
3238
|
+
verticals: deduplicateLines(verticals)
|
|
3239
|
+
};
|
|
3240
|
+
}
|
|
3241
|
+
function deduplicateLines(lines) {
|
|
3242
|
+
if (lines.length <= 1) return lines;
|
|
3243
|
+
const result = [];
|
|
3244
|
+
const tol = COORD_MERGE_TOL;
|
|
3245
|
+
for (const line of lines) {
|
|
3246
|
+
let isDuplicate = false;
|
|
3247
|
+
for (const existing of result) {
|
|
3248
|
+
if (Math.abs(line.y1 - existing.y1) <= tol && Math.abs(line.y2 - existing.y2) <= tol && Math.abs(line.x1 - existing.x1) <= tol && Math.abs(line.x2 - existing.x2) <= tol) {
|
|
3249
|
+
if (line.lineWidth > existing.lineWidth) {
|
|
3250
|
+
existing.lineWidth = line.lineWidth;
|
|
3251
|
+
}
|
|
3252
|
+
isDuplicate = true;
|
|
3253
|
+
break;
|
|
3254
|
+
}
|
|
3255
|
+
}
|
|
3256
|
+
if (!isDuplicate) result.push(line);
|
|
3257
|
+
}
|
|
3258
|
+
return result;
|
|
3075
3259
|
}
|
|
3076
3260
|
function classifyAndAdd(seg, lineWidth, horizontals, verticals) {
|
|
3077
3261
|
const dx = Math.abs(seg.x2 - seg.x1);
|
|
@@ -3667,6 +3851,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
3667
3851
|
const medianFontSize = computeMedianFontSize(allFontSizes);
|
|
3668
3852
|
if (medianFontSize > 0) {
|
|
3669
3853
|
detectHeadings(blocks, medianFontSize);
|
|
3854
|
+
mergeAdjacentHeadings(blocks);
|
|
3670
3855
|
}
|
|
3671
3856
|
detectMarkerHeadings(blocks);
|
|
3672
3857
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
@@ -3741,6 +3926,46 @@ function detectHeadings(blocks, medianFontSize) {
|
|
|
3741
3926
|
}
|
|
3742
3927
|
}
|
|
3743
3928
|
}
|
|
3929
|
+
function mergeAdjacentHeadings(blocks) {
|
|
3930
|
+
let i = 0;
|
|
3931
|
+
while (i < blocks.length - 1) {
|
|
3932
|
+
const curr = blocks[i];
|
|
3933
|
+
const next = blocks[i + 1];
|
|
3934
|
+
if (curr.type !== "heading" || next.type !== "heading") {
|
|
3935
|
+
i++;
|
|
3936
|
+
continue;
|
|
3937
|
+
}
|
|
3938
|
+
if (!curr.bbox || !next.bbox || !curr.text || !next.text) {
|
|
3939
|
+
i++;
|
|
3940
|
+
continue;
|
|
3941
|
+
}
|
|
3942
|
+
const currBaseline = curr.bbox.y + (curr.style?.fontSize || curr.bbox.height);
|
|
3943
|
+
const nextBaseline = next.bbox.y + (next.style?.fontSize || next.bbox.height);
|
|
3944
|
+
const yDiff = Math.abs(currBaseline - nextBaseline);
|
|
3945
|
+
const maxFs = Math.max(curr.style?.fontSize || 12, next.style?.fontSize || 12);
|
|
3946
|
+
const sameY = curr.bbox.page === next.bbox.page && yDiff < maxFs * 1.5;
|
|
3947
|
+
const sameLevel = curr.level === next.level;
|
|
3948
|
+
if (sameY && sameLevel) {
|
|
3949
|
+
const currX = curr.bbox.x;
|
|
3950
|
+
const nextX = next.bbox.x;
|
|
3951
|
+
if (currX <= nextX) {
|
|
3952
|
+
curr.text = curr.text + " " + next.text;
|
|
3953
|
+
} else {
|
|
3954
|
+
curr.text = next.text + " " + curr.text;
|
|
3955
|
+
}
|
|
3956
|
+
curr.bbox = {
|
|
3957
|
+
page: curr.bbox.page,
|
|
3958
|
+
x: Math.min(curr.bbox.x, next.bbox.x),
|
|
3959
|
+
y: Math.min(curr.bbox.y, next.bbox.y),
|
|
3960
|
+
width: Math.max(curr.bbox.x + curr.bbox.width, next.bbox.x + next.bbox.width) - Math.min(curr.bbox.x, next.bbox.x),
|
|
3961
|
+
height: Math.max(curr.bbox.height, next.bbox.height)
|
|
3962
|
+
};
|
|
3963
|
+
blocks.splice(i + 1, 1);
|
|
3964
|
+
} else {
|
|
3965
|
+
i++;
|
|
3966
|
+
}
|
|
3967
|
+
}
|
|
3968
|
+
}
|
|
3744
3969
|
function collapseEvenSpacing(text) {
|
|
3745
3970
|
const tokens = text.split(" ");
|
|
3746
3971
|
const singleCharCount = tokens.filter((t) => t.length === 1).length;
|
|
@@ -3749,6 +3974,169 @@ function collapseEvenSpacing(text) {
|
|
|
3749
3974
|
}
|
|
3750
3975
|
return text;
|
|
3751
3976
|
}
|
|
3977
|
+
function buildXyCutBlocks(items, pageNum) {
|
|
3978
|
+
const allY = items.map((i) => i.y);
|
|
3979
|
+
const pageHeight = Math.max(...allY) - Math.min(...allY);
|
|
3980
|
+
const gapThreshold = Math.max(15, pageHeight * 0.03);
|
|
3981
|
+
const orderedGroups = xyCutOrder(items, gapThreshold);
|
|
3982
|
+
const blocks = [];
|
|
3983
|
+
for (const group of orderedGroups) {
|
|
3984
|
+
if (group.length === 0) continue;
|
|
3985
|
+
const yLines = groupByY(group);
|
|
3986
|
+
for (const line of yLines) {
|
|
3987
|
+
const text = mergeLineSimple(line);
|
|
3988
|
+
if (!text.trim()) continue;
|
|
3989
|
+
blocks.push({
|
|
3990
|
+
type: "paragraph",
|
|
3991
|
+
text,
|
|
3992
|
+
pageNumber: pageNum,
|
|
3993
|
+
bbox: computeBBox(line, pageNum),
|
|
3994
|
+
style: dominantStyle(line)
|
|
3995
|
+
});
|
|
3996
|
+
}
|
|
3997
|
+
}
|
|
3998
|
+
return blocks.length > 0 ? blocks : null;
|
|
3999
|
+
}
|
|
4000
|
+
function normalizeUnderSegmentedTable(table, items, pageNum, bbox) {
|
|
4001
|
+
const totalCells = table.cells.reduce((sum, row) => sum + row.filter((c) => c.text.trim()).length, 0);
|
|
4002
|
+
const totalTextLines = table.cells.reduce((sum, row) => sum + row.reduce((s, c) => s + (c.text.trim() ? c.text.split("\n").length : 0), 0), 0);
|
|
4003
|
+
const isUnderSegmented = table.rows === 1 && table.cols === 1 || totalCells <= 2 && totalTextLines >= 8 || totalCells <= 2 && items.length >= 6;
|
|
4004
|
+
if (!isUnderSegmented) return null;
|
|
4005
|
+
if (hasMultiColumnLayout(items)) return buildXyCutBlocks(items, pageNum);
|
|
4006
|
+
const directTable = buildTableFromTextLayout(items, pageNum, bbox);
|
|
4007
|
+
if (directTable) return directTable;
|
|
4008
|
+
const clusterItems = items.map((i) => ({
|
|
4009
|
+
text: i.text,
|
|
4010
|
+
x: i.x,
|
|
4011
|
+
y: i.y,
|
|
4012
|
+
w: i.w,
|
|
4013
|
+
h: i.h,
|
|
4014
|
+
fontSize: i.fontSize,
|
|
4015
|
+
fontName: i.fontName
|
|
4016
|
+
}));
|
|
4017
|
+
const clusterResults = detectClusterTables(clusterItems, pageNum);
|
|
4018
|
+
if (clusterResults.length > 0) {
|
|
4019
|
+
const blocks = [];
|
|
4020
|
+
const ciToIdx = /* @__PURE__ */ new Map();
|
|
4021
|
+
for (let ci = 0; ci < clusterItems.length; ci++) ciToIdx.set(clusterItems[ci], ci);
|
|
4022
|
+
const usedIndices = /* @__PURE__ */ new Set();
|
|
4023
|
+
for (const cr of clusterResults) {
|
|
4024
|
+
for (const ci of cr.usedItems) {
|
|
4025
|
+
const idx = ciToIdx.get(ci);
|
|
4026
|
+
if (idx !== void 0) usedIndices.add(idx);
|
|
4027
|
+
}
|
|
4028
|
+
blocks.push({ type: "table", table: cr.table, pageNumber: pageNum, bbox: cr.bbox });
|
|
4029
|
+
}
|
|
4030
|
+
const remaining = items.filter((_, idx) => !usedIndices.has(idx));
|
|
4031
|
+
for (const item of remaining) {
|
|
4032
|
+
if (!item.text.trim()) continue;
|
|
4033
|
+
blocks.push({
|
|
4034
|
+
type: "paragraph",
|
|
4035
|
+
text: item.text,
|
|
4036
|
+
pageNumber: pageNum,
|
|
4037
|
+
bbox: computeBBox([item], pageNum),
|
|
4038
|
+
style: { fontSize: item.fontSize, fontName: item.fontName }
|
|
4039
|
+
});
|
|
4040
|
+
}
|
|
4041
|
+
blocks.sort((a, b) => {
|
|
4042
|
+
const ay = a.bbox ? a.bbox.y + a.bbox.height : 0;
|
|
4043
|
+
const by = b.bbox ? b.bbox.y + b.bbox.height : 0;
|
|
4044
|
+
return by - ay;
|
|
4045
|
+
});
|
|
4046
|
+
return blocks.length > 0 ? blocks : null;
|
|
4047
|
+
}
|
|
4048
|
+
return null;
|
|
4049
|
+
}
|
|
4050
|
+
function buildTableFromTextLayout(items, pageNum, bbox) {
|
|
4051
|
+
if (items.length < 4) return null;
|
|
4052
|
+
const sorted = [...items].sort((a, b) => b.y - a.y || a.x - b.x);
|
|
4053
|
+
const yTol = 3;
|
|
4054
|
+
const rows = [];
|
|
4055
|
+
let curRow = [sorted[0]];
|
|
4056
|
+
let curY = sorted[0].y;
|
|
4057
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
4058
|
+
if (Math.abs(sorted[i].y - curY) <= yTol) {
|
|
4059
|
+
curRow.push(sorted[i]);
|
|
4060
|
+
} else {
|
|
4061
|
+
rows.push(curRow);
|
|
4062
|
+
curRow = [sorted[i]];
|
|
4063
|
+
curY = sorted[i].y;
|
|
4064
|
+
}
|
|
4065
|
+
}
|
|
4066
|
+
rows.push(curRow);
|
|
4067
|
+
if (rows.length < 2) return null;
|
|
4068
|
+
const gapPositions = [];
|
|
4069
|
+
for (const row of rows) {
|
|
4070
|
+
if (row.length < 2) continue;
|
|
4071
|
+
const sortedX = [...row].sort((a, b) => a.x - b.x);
|
|
4072
|
+
const avgFs = sortedX.reduce((s, i) => s + i.fontSize, 0) / sortedX.length;
|
|
4073
|
+
for (let j = 1; j < sortedX.length; j++) {
|
|
4074
|
+
const gap = sortedX[j].x - (sortedX[j - 1].x + sortedX[j - 1].w);
|
|
4075
|
+
if (gap >= avgFs * 1.5) {
|
|
4076
|
+
gapPositions.push(sortedX[j - 1].x + sortedX[j - 1].w + gap / 2);
|
|
4077
|
+
}
|
|
4078
|
+
}
|
|
4079
|
+
}
|
|
4080
|
+
if (gapPositions.length < 2) return null;
|
|
4081
|
+
gapPositions.sort((a, b) => a - b);
|
|
4082
|
+
const colBoundaries = [];
|
|
4083
|
+
let clusterSum = gapPositions[0], clusterCount = 1;
|
|
4084
|
+
for (let i = 1; i < gapPositions.length; i++) {
|
|
4085
|
+
const avg = clusterSum / clusterCount;
|
|
4086
|
+
if (Math.abs(gapPositions[i] - avg) <= 15) {
|
|
4087
|
+
clusterSum += gapPositions[i];
|
|
4088
|
+
clusterCount++;
|
|
4089
|
+
} else {
|
|
4090
|
+
if (clusterCount >= 2) colBoundaries.push(clusterSum / clusterCount);
|
|
4091
|
+
clusterSum = gapPositions[i];
|
|
4092
|
+
clusterCount = 1;
|
|
4093
|
+
}
|
|
4094
|
+
}
|
|
4095
|
+
if (clusterCount >= 2) colBoundaries.push(clusterSum / clusterCount);
|
|
4096
|
+
if (colBoundaries.length === 0) return null;
|
|
4097
|
+
const numCols = colBoundaries.length + 1;
|
|
4098
|
+
const tableRows = [];
|
|
4099
|
+
for (const row of rows) {
|
|
4100
|
+
const cells = Array(numCols).fill("");
|
|
4101
|
+
const sortedX = [...row].sort((a, b) => a.x - b.x);
|
|
4102
|
+
for (const item of sortedX) {
|
|
4103
|
+
const cx = item.x + item.w / 2;
|
|
4104
|
+
let col = 0;
|
|
4105
|
+
for (let b = 0; b < colBoundaries.length; b++) {
|
|
4106
|
+
if (cx > colBoundaries[b]) col = b + 1;
|
|
4107
|
+
}
|
|
4108
|
+
cells[col] = cells[col] ? cells[col] + " " + item.text : item.text;
|
|
4109
|
+
}
|
|
4110
|
+
if (cells[0].trim() === "" && tableRows.length > 0) {
|
|
4111
|
+
const prevCells = tableRows[tableRows.length - 1].cells;
|
|
4112
|
+
for (let c = 0; c < numCols; c++) {
|
|
4113
|
+
if (cells[c].trim()) {
|
|
4114
|
+
prevCells[c] = prevCells[c] ? prevCells[c] + " " + cells[c].trim() : cells[c].trim();
|
|
4115
|
+
}
|
|
4116
|
+
}
|
|
4117
|
+
} else {
|
|
4118
|
+
tableRows.push({ cells });
|
|
4119
|
+
}
|
|
4120
|
+
}
|
|
4121
|
+
if (tableRows.length < 2) return null;
|
|
4122
|
+
const nonEmptyCount = tableRows.reduce((sum, r) => sum + r.cells.filter((c) => c.trim()).length, 0);
|
|
4123
|
+
const totalCount = tableRows.length * numCols;
|
|
4124
|
+
if (nonEmptyCount < totalCount * 0.3) return null;
|
|
4125
|
+
const irCells = tableRows.map(
|
|
4126
|
+
(r) => r.cells.map((text, colIdx) => {
|
|
4127
|
+
let cleaned = text.trim();
|
|
4128
|
+
if (colIdx > 0) cleaned = cleaned.replace(/^[•○·\-]\s*/, "");
|
|
4129
|
+
return { text: cleaned, colSpan: 1, rowSpan: 1 };
|
|
4130
|
+
})
|
|
4131
|
+
);
|
|
4132
|
+
const irTable = {
|
|
4133
|
+
rows: tableRows.length,
|
|
4134
|
+
cols: numCols,
|
|
4135
|
+
cells: irCells,
|
|
4136
|
+
hasHeader: tableRows.length > 1
|
|
4137
|
+
};
|
|
4138
|
+
return [{ type: "table", table: irTable, pageNumber: pageNum, bbox }];
|
|
4139
|
+
}
|
|
3752
4140
|
function shouldDemoteTable(table) {
|
|
3753
4141
|
const allCells = table.cells.flatMap((row) => row.map((c) => c.text.trim())).filter(Boolean);
|
|
3754
4142
|
const allText = allCells.join(" ");
|
|
@@ -3795,6 +4183,32 @@ function detectMarkerHeadings(blocks) {
|
|
|
3795
4183
|
}
|
|
3796
4184
|
}
|
|
3797
4185
|
}
|
|
4186
|
+
function hasMultiColumnLayout(items) {
|
|
4187
|
+
if (items.length < 30) return false;
|
|
4188
|
+
const sorted = [...items].sort((a, b) => a.x - b.x);
|
|
4189
|
+
const minX = sorted[0].x;
|
|
4190
|
+
let maxX = minX;
|
|
4191
|
+
for (const i of sorted) if (i.x + i.w > maxX) maxX = i.x + i.w;
|
|
4192
|
+
const pageWidth = maxX - minX;
|
|
4193
|
+
if (pageWidth < 200) return false;
|
|
4194
|
+
let bestGap = 0;
|
|
4195
|
+
let bestSplit = 0;
|
|
4196
|
+
for (let j = 1; j < sorted.length; j++) {
|
|
4197
|
+
const gap = sorted[j].x - (sorted[j - 1].x + sorted[j - 1].w);
|
|
4198
|
+
if (gap > bestGap) {
|
|
4199
|
+
bestGap = gap;
|
|
4200
|
+
bestSplit = (sorted[j - 1].x + sorted[j - 1].w + sorted[j].x) / 2;
|
|
4201
|
+
}
|
|
4202
|
+
}
|
|
4203
|
+
if (bestGap < 20) return false;
|
|
4204
|
+
const splitRatio = (bestSplit - minX) / pageWidth;
|
|
4205
|
+
if (splitRatio < 0.35 || splitRatio > 0.65) return false;
|
|
4206
|
+
const leftCount = items.filter((i) => i.x + i.w / 2 < bestSplit).length;
|
|
4207
|
+
const rightCount = items.filter((i) => i.x + i.w / 2 >= bestSplit).length;
|
|
4208
|
+
if (leftCount < 15 || rightCount < 15) return false;
|
|
4209
|
+
if (Math.min(leftCount, rightCount) / Math.max(leftCount, rightCount) < 0.35) return false;
|
|
4210
|
+
return true;
|
|
4211
|
+
}
|
|
3798
4212
|
var MAX_XYCUT_DEPTH = 50;
|
|
3799
4213
|
function xyCutOrder(items, gapThreshold, depth = 0) {
|
|
3800
4214
|
if (items.length === 0) return [];
|
|
@@ -3925,6 +4339,11 @@ function extractBlocksWithGrids(items, pageNum, grids, horizontals, verticals) {
|
|
|
3925
4339
|
width: grid.bbox.x2 - grid.bbox.x1,
|
|
3926
4340
|
height: grid.bbox.y2 - grid.bbox.y1
|
|
3927
4341
|
};
|
|
4342
|
+
const normalized = normalizeUnderSegmentedTable(irTable, tableItems, pageNum, tableBbox);
|
|
4343
|
+
if (normalized) {
|
|
4344
|
+
blocks.push(...normalized);
|
|
4345
|
+
continue;
|
|
4346
|
+
}
|
|
3928
4347
|
if (shouldDemoteTable(irTable)) {
|
|
3929
4348
|
const demoted = demoteTableToText(irTable);
|
|
3930
4349
|
if (demoted) {
|
|
@@ -3970,6 +4389,10 @@ function mergeAdjacentTableBlocks(blocks) {
|
|
|
3970
4389
|
}
|
|
3971
4390
|
function extractPageBlocksFallback(items, pageNum) {
|
|
3972
4391
|
if (items.length === 0) return [];
|
|
4392
|
+
if (hasMultiColumnLayout(items)) {
|
|
4393
|
+
const xyBlocks = buildXyCutBlocks(items, pageNum) || [];
|
|
4394
|
+
return detectSpecialKoreanTables(detectListBlocks(xyBlocks));
|
|
4395
|
+
}
|
|
3973
4396
|
const blocks = [];
|
|
3974
4397
|
const allYLines = groupByY(items);
|
|
3975
4398
|
const columns = detectColumns(allYLines);
|
|
@@ -3987,7 +4410,7 @@ function extractPageBlocksFallback(items, pageNum) {
|
|
|
3987
4410
|
fontSize: i.fontSize,
|
|
3988
4411
|
fontName: i.fontName
|
|
3989
4412
|
}));
|
|
3990
|
-
const clusterResults = detectClusterTables(clusterItems, pageNum);
|
|
4413
|
+
const clusterResults = hasMultiColumnLayout(items) ? [] : detectClusterTables(clusterItems, pageNum);
|
|
3991
4414
|
if (clusterResults.length > 0) {
|
|
3992
4415
|
const ciToIdx = /* @__PURE__ */ new Map();
|
|
3993
4416
|
for (let ci = 0; ci < clusterItems.length; ci++) ciToIdx.set(clusterItems[ci], ci);
|