pdfjs-reader-core 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +115 -116
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +14 -2
- package/dist/index.d.ts +14 -2
- package/dist/index.js +115 -116
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1331,37 +1331,54 @@ async function extractPageText(document2, pageNumber) {
|
|
|
1331
1331
|
const textContent = await page.getTextContent();
|
|
1332
1332
|
const viewport = page.getViewport({ scale: 1 });
|
|
1333
1333
|
let fullText = "";
|
|
1334
|
-
const
|
|
1334
|
+
const textItems = [];
|
|
1335
1335
|
for (const item of textContent.items) {
|
|
1336
1336
|
if ("str" in item && item.str) {
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
for (let i = 0; i < item.str.length; i++) {
|
|
1344
|
-
charPositions.push({
|
|
1345
|
-
char: item.str[i],
|
|
1346
|
-
rect: {
|
|
1347
|
-
x: x + i * charWidth,
|
|
1348
|
-
y: y - height,
|
|
1349
|
-
width: charWidth,
|
|
1350
|
-
height
|
|
1351
|
-
}
|
|
1352
|
-
});
|
|
1353
|
-
}
|
|
1337
|
+
textItems.push({
|
|
1338
|
+
text: item.str,
|
|
1339
|
+
transform: item.transform,
|
|
1340
|
+
width: item.width ?? 0,
|
|
1341
|
+
height: item.height ?? 12
|
|
1342
|
+
});
|
|
1354
1343
|
fullText += item.str;
|
|
1355
1344
|
}
|
|
1356
1345
|
}
|
|
1357
|
-
return { fullText,
|
|
1346
|
+
return { fullText, textItems, viewport };
|
|
1347
|
+
}
|
|
1348
|
+
function calculateMatchRects(textItems, startOffset, length, viewport) {
|
|
1349
|
+
const rects = [];
|
|
1350
|
+
let currentOffset = 0;
|
|
1351
|
+
for (const item of textItems) {
|
|
1352
|
+
const itemStart = currentOffset;
|
|
1353
|
+
const itemEnd = currentOffset + item.text.length;
|
|
1354
|
+
if (itemEnd > startOffset && itemStart < startOffset + length) {
|
|
1355
|
+
const [, , c, d, tx, ty] = item.transform;
|
|
1356
|
+
const x = tx;
|
|
1357
|
+
const y = viewport.height - ty;
|
|
1358
|
+
const height = Math.sqrt(c * c + d * d);
|
|
1359
|
+
const matchStartInItem = Math.max(0, startOffset - itemStart);
|
|
1360
|
+
const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
|
|
1361
|
+
const charWidth = item.text.length > 0 ? item.width / item.text.length : item.width;
|
|
1362
|
+
const matchWidth = charWidth * (matchEndInItem - matchStartInItem);
|
|
1363
|
+
const matchX = x + charWidth * matchStartInItem;
|
|
1364
|
+
const yOffset = height * 0.15;
|
|
1365
|
+
rects.push({
|
|
1366
|
+
x: matchX,
|
|
1367
|
+
y: y - height + yOffset,
|
|
1368
|
+
width: matchWidth,
|
|
1369
|
+
height
|
|
1370
|
+
});
|
|
1371
|
+
}
|
|
1372
|
+
currentOffset = itemEnd;
|
|
1373
|
+
}
|
|
1374
|
+
return rects;
|
|
1358
1375
|
}
|
|
1359
1376
|
async function findTextOnPage(document2, pageNumber, query, options = {}) {
|
|
1360
1377
|
const { caseSensitive = false, wholeWord = false } = options;
|
|
1361
1378
|
if (!query || pageNumber < 1 || pageNumber > document2.numPages) {
|
|
1362
1379
|
return [];
|
|
1363
1380
|
}
|
|
1364
|
-
const { fullText,
|
|
1381
|
+
const { fullText, textItems, viewport } = await extractPageText(document2, pageNumber);
|
|
1365
1382
|
const matches = [];
|
|
1366
1383
|
const searchText = caseSensitive ? query : query.toLowerCase();
|
|
1367
1384
|
const textToSearch = caseSensitive ? fullText : fullText.toLowerCase();
|
|
@@ -1377,17 +1394,15 @@ async function findTextOnPage(document2, pageNumber, query, options = {}) {
|
|
|
1377
1394
|
continue;
|
|
1378
1395
|
}
|
|
1379
1396
|
}
|
|
1380
|
-
const matchRects =
|
|
1381
|
-
|
|
1382
|
-
|
|
1397
|
+
const matchRects = calculateMatchRects(textItems, matchIndex, query.length, viewport);
|
|
1398
|
+
if (matchRects.length > 0) {
|
|
1399
|
+
matches.push({
|
|
1400
|
+
text: fullText.substring(matchIndex, matchIndex + query.length),
|
|
1401
|
+
rects: matchRects,
|
|
1402
|
+
pageNumber,
|
|
1403
|
+
startIndex: matchIndex
|
|
1404
|
+
});
|
|
1383
1405
|
}
|
|
1384
|
-
const mergedRects = mergeAdjacentRects(matchRects);
|
|
1385
|
-
matches.push({
|
|
1386
|
-
text: fullText.substring(matchIndex, matchIndex + query.length),
|
|
1387
|
-
rects: mergedRects,
|
|
1388
|
-
pageNumber,
|
|
1389
|
-
startIndex: matchIndex
|
|
1390
|
-
});
|
|
1391
1406
|
startIndex = matchIndex + 1;
|
|
1392
1407
|
}
|
|
1393
1408
|
return matches;
|
|
@@ -1640,7 +1655,7 @@ function createSearchStore(initialOverrides = {}) {
|
|
|
1640
1655
|
}
|
|
1641
1656
|
}
|
|
1642
1657
|
const matchText = pageText.substring(startIndex, startIndex + query.length);
|
|
1643
|
-
const rects =
|
|
1658
|
+
const rects = calculateMatchRects2(textItems, startIndex, query.length, viewport);
|
|
1644
1659
|
results.push({
|
|
1645
1660
|
pageNumber: pageNum,
|
|
1646
1661
|
matchIndex: matchIndex++,
|
|
@@ -1701,7 +1716,7 @@ function createSearchStore(initialOverrides = {}) {
|
|
|
1701
1716
|
}
|
|
1702
1717
|
}));
|
|
1703
1718
|
}
|
|
1704
|
-
function
|
|
1719
|
+
function calculateMatchRects2(textItems, startOffset, length, viewport) {
|
|
1705
1720
|
const rects = [];
|
|
1706
1721
|
let currentOffset = 0;
|
|
1707
1722
|
for (const item of textItems) {
|
|
@@ -1716,9 +1731,10 @@ function calculateMatchRects(textItems, startOffset, length, viewport) {
|
|
|
1716
1731
|
const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
|
|
1717
1732
|
const matchWidth = item.width / item.text.length * (matchEndInItem - matchStartInItem);
|
|
1718
1733
|
const matchX = x + item.width / item.text.length * matchStartInItem;
|
|
1734
|
+
const yOffset = height * 0.15;
|
|
1719
1735
|
rects.push({
|
|
1720
1736
|
x: matchX,
|
|
1721
|
-
y: y - height,
|
|
1737
|
+
y: y - height + yOffset,
|
|
1722
1738
|
width: matchWidth,
|
|
1723
1739
|
height
|
|
1724
1740
|
});
|
|
@@ -9295,24 +9311,33 @@ function getSrcIdentifier(src) {
|
|
|
9295
9311
|
const last = Array.from(data.slice(-4)).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
9296
9312
|
return `binary:${len}:${first}:${last}`;
|
|
9297
9313
|
}
|
|
9298
|
-
function
|
|
9299
|
-
|
|
9300
|
-
|
|
9301
|
-
const
|
|
9302
|
-
|
|
9303
|
-
|
|
9304
|
-
|
|
9305
|
-
|
|
9306
|
-
const
|
|
9307
|
-
|
|
9308
|
-
|
|
9309
|
-
|
|
9310
|
-
|
|
9311
|
-
|
|
9314
|
+
function calculateMatchRects3(textItems, startOffset, length, viewport) {
|
|
9315
|
+
const rects = [];
|
|
9316
|
+
let currentOffset = 0;
|
|
9317
|
+
for (const item of textItems) {
|
|
9318
|
+
const itemStart = currentOffset;
|
|
9319
|
+
const itemEnd = currentOffset + item.text.length;
|
|
9320
|
+
if (itemEnd > startOffset && itemStart < startOffset + length) {
|
|
9321
|
+
const [, , c, d, tx, ty] = item.transform;
|
|
9322
|
+
const x = tx;
|
|
9323
|
+
const y = viewport.height - ty;
|
|
9324
|
+
const height = Math.sqrt(c * c + d * d);
|
|
9325
|
+
const matchStartInItem = Math.max(0, startOffset - itemStart);
|
|
9326
|
+
const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
|
|
9327
|
+
const charWidth = item.text.length > 0 ? item.width / item.text.length : item.width;
|
|
9328
|
+
const matchWidth = charWidth * (matchEndInItem - matchStartInItem);
|
|
9329
|
+
const matchX = x + charWidth * matchStartInItem;
|
|
9330
|
+
const yOffset = height * 0.15;
|
|
9331
|
+
rects.push({
|
|
9332
|
+
x: matchX,
|
|
9333
|
+
y: y - height + yOffset,
|
|
9334
|
+
width: matchWidth,
|
|
9335
|
+
height
|
|
9336
|
+
});
|
|
9312
9337
|
}
|
|
9338
|
+
currentOffset = itemEnd;
|
|
9313
9339
|
}
|
|
9314
|
-
|
|
9315
|
-
return merged;
|
|
9340
|
+
return rects;
|
|
9316
9341
|
}
|
|
9317
9342
|
var import_react40, import_jsx_runtime26, PDFViewerInner, PDFViewerInnerWithRef, PDFViewerClient;
|
|
9318
9343
|
var init_PDFViewerClient = __esm({
|
|
@@ -9411,26 +9436,15 @@ var init_PDFViewerClient = __esm({
|
|
|
9411
9436
|
const textContent = await page.getTextContent();
|
|
9412
9437
|
const viewport = page.getViewport({ scale: 1 });
|
|
9413
9438
|
let fullText = "";
|
|
9414
|
-
const
|
|
9439
|
+
const textItems = [];
|
|
9415
9440
|
for (const item of textContent.items) {
|
|
9416
9441
|
if ("str" in item && item.str) {
|
|
9417
|
-
|
|
9418
|
-
|
|
9419
|
-
|
|
9420
|
-
|
|
9421
|
-
|
|
9422
|
-
|
|
9423
|
-
for (let i = 0; i < item.str.length; i++) {
|
|
9424
|
-
charPositions.push({
|
|
9425
|
-
char: item.str[i],
|
|
9426
|
-
rect: {
|
|
9427
|
-
x: x + i * charWidth,
|
|
9428
|
-
y: y - height,
|
|
9429
|
-
width: charWidth,
|
|
9430
|
-
height
|
|
9431
|
-
}
|
|
9432
|
-
});
|
|
9433
|
-
}
|
|
9442
|
+
textItems.push({
|
|
9443
|
+
text: item.str,
|
|
9444
|
+
transform: item.transform,
|
|
9445
|
+
width: item.width ?? 0,
|
|
9446
|
+
height: item.height ?? 12
|
|
9447
|
+
});
|
|
9434
9448
|
fullText += item.str;
|
|
9435
9449
|
}
|
|
9436
9450
|
}
|
|
@@ -9439,18 +9453,16 @@ var init_PDFViewerClient = __esm({
|
|
|
9439
9453
|
while (true) {
|
|
9440
9454
|
const matchIndex = textToSearch.indexOf(searchText, startIndex);
|
|
9441
9455
|
if (matchIndex === -1) break;
|
|
9442
|
-
const matchRects =
|
|
9443
|
-
|
|
9444
|
-
|
|
9456
|
+
const matchRects = calculateMatchRects3(textItems, matchIndex, text.length, viewport);
|
|
9457
|
+
if (matchRects.length > 0) {
|
|
9458
|
+
const highlight = annotationStore.getState().addHighlight({
|
|
9459
|
+
pageNumber: pageNum,
|
|
9460
|
+
rects: matchRects,
|
|
9461
|
+
color,
|
|
9462
|
+
text: fullText.substring(matchIndex, matchIndex + text.length)
|
|
9463
|
+
});
|
|
9464
|
+
highlightIds.push(highlight.id);
|
|
9445
9465
|
}
|
|
9446
|
-
const mergedRects = mergeRects2(matchRects);
|
|
9447
|
-
const highlight = annotationStore.getState().addHighlight({
|
|
9448
|
-
pageNumber: pageNum,
|
|
9449
|
-
rects: mergedRects,
|
|
9450
|
-
color,
|
|
9451
|
-
text: fullText.substring(matchIndex, matchIndex + text.length)
|
|
9452
|
-
});
|
|
9453
|
-
highlightIds.push(highlight.id);
|
|
9454
9466
|
startIndex = matchIndex + 1;
|
|
9455
9467
|
}
|
|
9456
9468
|
} catch {
|
|
@@ -9628,33 +9640,22 @@ var init_PDFViewerClient = __esm({
|
|
|
9628
9640
|
const textContent = await page.getTextContent();
|
|
9629
9641
|
const viewport = page.getViewport({ scale: 1 });
|
|
9630
9642
|
let fullText = "";
|
|
9631
|
-
const
|
|
9643
|
+
const textItems = [];
|
|
9632
9644
|
for (const item of textContent.items) {
|
|
9633
9645
|
if ("str" in item && item.str) {
|
|
9634
|
-
|
|
9635
|
-
|
|
9636
|
-
|
|
9637
|
-
|
|
9638
|
-
|
|
9639
|
-
|
|
9640
|
-
for (let i = 0; i < item.str.length; i++) {
|
|
9641
|
-
charPositions.push({
|
|
9642
|
-
char: item.str[i],
|
|
9643
|
-
rect: {
|
|
9644
|
-
x: x + i * charWidth,
|
|
9645
|
-
y: y - height,
|
|
9646
|
-
width: charWidth,
|
|
9647
|
-
height
|
|
9648
|
-
}
|
|
9649
|
-
});
|
|
9650
|
-
}
|
|
9646
|
+
textItems.push({
|
|
9647
|
+
text: item.str,
|
|
9648
|
+
transform: item.transform,
|
|
9649
|
+
width: item.width ?? 0,
|
|
9650
|
+
height: item.height ?? 12
|
|
9651
|
+
});
|
|
9651
9652
|
fullText += item.str;
|
|
9652
9653
|
}
|
|
9653
9654
|
}
|
|
9654
9655
|
const textToSearch = caseSensitive ? fullText : fullText.toLowerCase();
|
|
9655
9656
|
let startIndex = 0;
|
|
9656
9657
|
while (true) {
|
|
9657
|
-
|
|
9658
|
+
const matchIndex = textToSearch.indexOf(searchText, startIndex);
|
|
9658
9659
|
if (matchIndex === -1) break;
|
|
9659
9660
|
if (wholeWord) {
|
|
9660
9661
|
const beforeChar = matchIndex > 0 ? textToSearch[matchIndex - 1] : " ";
|
|
@@ -9664,26 +9665,24 @@ var init_PDFViewerClient = __esm({
|
|
|
9664
9665
|
continue;
|
|
9665
9666
|
}
|
|
9666
9667
|
}
|
|
9667
|
-
const matchRects =
|
|
9668
|
-
|
|
9669
|
-
|
|
9668
|
+
const matchRects = calculateMatchRects3(textItems, matchIndex, query.length, viewport);
|
|
9669
|
+
if (matchRects.length > 0) {
|
|
9670
|
+
const highlight = annotationStore.getState().addHighlight({
|
|
9671
|
+
pageNumber: pageNum,
|
|
9672
|
+
rects: matchRects,
|
|
9673
|
+
color,
|
|
9674
|
+
text: fullText.substring(matchIndex, matchIndex + query.length),
|
|
9675
|
+
source: "search"
|
|
9676
|
+
});
|
|
9677
|
+
result.matchCount++;
|
|
9678
|
+
result.highlightIds.push(highlight.id);
|
|
9679
|
+
result.matches.push({
|
|
9680
|
+
pageNumber: pageNum,
|
|
9681
|
+
text: fullText.substring(matchIndex, matchIndex + query.length),
|
|
9682
|
+
highlightId: highlight.id,
|
|
9683
|
+
rects: matchRects
|
|
9684
|
+
});
|
|
9670
9685
|
}
|
|
9671
|
-
const mergedRects = mergeRects2(matchRects);
|
|
9672
|
-
const highlight = annotationStore.getState().addHighlight({
|
|
9673
|
-
pageNumber: pageNum,
|
|
9674
|
-
rects: mergedRects,
|
|
9675
|
-
color,
|
|
9676
|
-
text: fullText.substring(matchIndex, matchIndex + query.length),
|
|
9677
|
-
source: "search"
|
|
9678
|
-
});
|
|
9679
|
-
result.matchCount++;
|
|
9680
|
-
result.highlightIds.push(highlight.id);
|
|
9681
|
-
result.matches.push({
|
|
9682
|
-
pageNumber: pageNum,
|
|
9683
|
-
text: fullText.substring(matchIndex, matchIndex + query.length),
|
|
9684
|
-
highlightId: highlight.id,
|
|
9685
|
-
rects: mergedRects
|
|
9686
|
-
});
|
|
9687
9686
|
startIndex = matchIndex + 1;
|
|
9688
9687
|
}
|
|
9689
9688
|
} catch {
|