pdfjs-reader-core 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1331,37 +1331,54 @@ async function extractPageText(document2, pageNumber) {
1331
1331
  const textContent = await page.getTextContent();
1332
1332
  const viewport = page.getViewport({ scale: 1 });
1333
1333
  let fullText = "";
1334
- const charPositions = [];
1334
+ const textItems = [];
1335
1335
  for (const item of textContent.items) {
1336
1336
  if ("str" in item && item.str) {
1337
- const tx = item.transform;
1338
- const x = tx[4];
1339
- const y = viewport.height - tx[5];
1340
- const width = item.width ?? 0;
1341
- const height = item.height ?? 12;
1342
- const charWidth = item.str.length > 0 ? width / item.str.length : width;
1343
- for (let i = 0; i < item.str.length; i++) {
1344
- charPositions.push({
1345
- char: item.str[i],
1346
- rect: {
1347
- x: x + i * charWidth,
1348
- y: y - height,
1349
- width: charWidth,
1350
- height
1351
- }
1352
- });
1353
- }
1337
+ textItems.push({
1338
+ text: item.str,
1339
+ transform: item.transform,
1340
+ width: item.width ?? 0,
1341
+ height: item.height ?? 12
1342
+ });
1354
1343
  fullText += item.str;
1355
1344
  }
1356
1345
  }
1357
- return { fullText, charPositions };
1346
+ return { fullText, textItems, viewport };
1347
+ }
1348
+ function calculateMatchRects(textItems, startOffset, length, viewport) {
1349
+ const rects = [];
1350
+ let currentOffset = 0;
1351
+ for (const item of textItems) {
1352
+ const itemStart = currentOffset;
1353
+ const itemEnd = currentOffset + item.text.length;
1354
+ if (itemEnd > startOffset && itemStart < startOffset + length) {
1355
+ const [, , c, d, tx, ty] = item.transform;
1356
+ const x = tx;
1357
+ const y = viewport.height - ty;
1358
+ const height = Math.sqrt(c * c + d * d);
1359
+ const matchStartInItem = Math.max(0, startOffset - itemStart);
1360
+ const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
1361
+ const charWidth = item.text.length > 0 ? item.width / item.text.length : item.width;
1362
+ const matchWidth = charWidth * (matchEndInItem - matchStartInItem);
1363
+ const matchX = x + charWidth * matchStartInItem;
1364
+ const yOffset = height * 0.15;
1365
+ rects.push({
1366
+ x: matchX,
1367
+ y: y - height + yOffset,
1368
+ width: matchWidth,
1369
+ height
1370
+ });
1371
+ }
1372
+ currentOffset = itemEnd;
1373
+ }
1374
+ return rects;
1358
1375
  }
1359
1376
  async function findTextOnPage(document2, pageNumber, query, options = {}) {
1360
1377
  const { caseSensitive = false, wholeWord = false } = options;
1361
1378
  if (!query || pageNumber < 1 || pageNumber > document2.numPages) {
1362
1379
  return [];
1363
1380
  }
1364
- const { fullText, charPositions } = await extractPageText(document2, pageNumber);
1381
+ const { fullText, textItems, viewport } = await extractPageText(document2, pageNumber);
1365
1382
  const matches = [];
1366
1383
  const searchText = caseSensitive ? query : query.toLowerCase();
1367
1384
  const textToSearch = caseSensitive ? fullText : fullText.toLowerCase();
@@ -1377,17 +1394,15 @@ async function findTextOnPage(document2, pageNumber, query, options = {}) {
1377
1394
  continue;
1378
1395
  }
1379
1396
  }
1380
- const matchRects = [];
1381
- for (let i = matchIndex; i < matchIndex + query.length && i < charPositions.length; i++) {
1382
- matchRects.push(charPositions[i].rect);
1397
+ const matchRects = calculateMatchRects(textItems, matchIndex, query.length, viewport);
1398
+ if (matchRects.length > 0) {
1399
+ matches.push({
1400
+ text: fullText.substring(matchIndex, matchIndex + query.length),
1401
+ rects: matchRects,
1402
+ pageNumber,
1403
+ startIndex: matchIndex
1404
+ });
1383
1405
  }
1384
- const mergedRects = mergeAdjacentRects(matchRects);
1385
- matches.push({
1386
- text: fullText.substring(matchIndex, matchIndex + query.length),
1387
- rects: mergedRects,
1388
- pageNumber,
1389
- startIndex: matchIndex
1390
- });
1391
1406
  startIndex = matchIndex + 1;
1392
1407
  }
1393
1408
  return matches;
@@ -1640,7 +1655,7 @@ function createSearchStore(initialOverrides = {}) {
1640
1655
  }
1641
1656
  }
1642
1657
  const matchText = pageText.substring(startIndex, startIndex + query.length);
1643
- const rects = calculateMatchRects(textItems, startIndex, query.length, viewport);
1658
+ const rects = calculateMatchRects2(textItems, startIndex, query.length, viewport);
1644
1659
  results.push({
1645
1660
  pageNumber: pageNum,
1646
1661
  matchIndex: matchIndex++,
@@ -1701,7 +1716,7 @@ function createSearchStore(initialOverrides = {}) {
1701
1716
  }
1702
1717
  }));
1703
1718
  }
1704
- function calculateMatchRects(textItems, startOffset, length, viewport) {
1719
+ function calculateMatchRects2(textItems, startOffset, length, viewport) {
1705
1720
  const rects = [];
1706
1721
  let currentOffset = 0;
1707
1722
  for (const item of textItems) {
@@ -1716,9 +1731,10 @@ function calculateMatchRects(textItems, startOffset, length, viewport) {
1716
1731
  const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
1717
1732
  const matchWidth = item.width / item.text.length * (matchEndInItem - matchStartInItem);
1718
1733
  const matchX = x + item.width / item.text.length * matchStartInItem;
1734
+ const yOffset = height * 0.15;
1719
1735
  rects.push({
1720
1736
  x: matchX,
1721
- y: y - height,
1737
+ y: y - height + yOffset,
1722
1738
  width: matchWidth,
1723
1739
  height
1724
1740
  });
@@ -9295,24 +9311,33 @@ function getSrcIdentifier(src) {
9295
9311
  const last = Array.from(data.slice(-4)).map((b) => b.toString(16).padStart(2, "0")).join("");
9296
9312
  return `binary:${len}:${first}:${last}`;
9297
9313
  }
9298
- function mergeRects2(rects) {
9299
- if (rects.length === 0) return [];
9300
- const sorted = [...rects].sort((a, b) => a.y - b.y || a.x - b.x);
9301
- const merged = [];
9302
- let current = { ...sorted[0] };
9303
- for (let i = 1; i < sorted.length; i++) {
9304
- const rect = sorted[i];
9305
- if (Math.abs(rect.y - current.y) < 2 && rect.x <= current.x + current.width + 2) {
9306
- const newRight = Math.max(current.x + current.width, rect.x + rect.width);
9307
- current.width = newRight - current.x;
9308
- current.height = Math.max(current.height, rect.height);
9309
- } else {
9310
- merged.push(current);
9311
- current = { ...rect };
9314
+ function calculateMatchRects3(textItems, startOffset, length, viewport) {
9315
+ const rects = [];
9316
+ let currentOffset = 0;
9317
+ for (const item of textItems) {
9318
+ const itemStart = currentOffset;
9319
+ const itemEnd = currentOffset + item.text.length;
9320
+ if (itemEnd > startOffset && itemStart < startOffset + length) {
9321
+ const [, , c, d, tx, ty] = item.transform;
9322
+ const x = tx;
9323
+ const y = viewport.height - ty;
9324
+ const height = Math.sqrt(c * c + d * d);
9325
+ const matchStartInItem = Math.max(0, startOffset - itemStart);
9326
+ const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
9327
+ const charWidth = item.text.length > 0 ? item.width / item.text.length : item.width;
9328
+ const matchWidth = charWidth * (matchEndInItem - matchStartInItem);
9329
+ const matchX = x + charWidth * matchStartInItem;
9330
+ const yOffset = height * 0.15;
9331
+ rects.push({
9332
+ x: matchX,
9333
+ y: y - height + yOffset,
9334
+ width: matchWidth,
9335
+ height
9336
+ });
9312
9337
  }
9338
+ currentOffset = itemEnd;
9313
9339
  }
9314
- merged.push(current);
9315
- return merged;
9340
+ return rects;
9316
9341
  }
9317
9342
  var import_react40, import_jsx_runtime26, PDFViewerInner, PDFViewerInnerWithRef, PDFViewerClient;
9318
9343
  var init_PDFViewerClient = __esm({
@@ -9411,26 +9436,15 @@ var init_PDFViewerClient = __esm({
9411
9436
  const textContent = await page.getTextContent();
9412
9437
  const viewport = page.getViewport({ scale: 1 });
9413
9438
  let fullText = "";
9414
- const charPositions = [];
9439
+ const textItems = [];
9415
9440
  for (const item of textContent.items) {
9416
9441
  if ("str" in item && item.str) {
9417
- const tx = item.transform;
9418
- const x = tx[4];
9419
- const y = viewport.height - tx[5];
9420
- const width = item.width ?? 0;
9421
- const height = item.height ?? 12;
9422
- const charWidth = item.str.length > 0 ? width / item.str.length : width;
9423
- for (let i = 0; i < item.str.length; i++) {
9424
- charPositions.push({
9425
- char: item.str[i],
9426
- rect: {
9427
- x: x + i * charWidth,
9428
- y: y - height,
9429
- width: charWidth,
9430
- height
9431
- }
9432
- });
9433
- }
9442
+ textItems.push({
9443
+ text: item.str,
9444
+ transform: item.transform,
9445
+ width: item.width ?? 0,
9446
+ height: item.height ?? 12
9447
+ });
9434
9448
  fullText += item.str;
9435
9449
  }
9436
9450
  }
@@ -9439,18 +9453,16 @@ var init_PDFViewerClient = __esm({
9439
9453
  while (true) {
9440
9454
  const matchIndex = textToSearch.indexOf(searchText, startIndex);
9441
9455
  if (matchIndex === -1) break;
9442
- const matchRects = [];
9443
- for (let i = matchIndex; i < matchIndex + text.length && i < charPositions.length; i++) {
9444
- matchRects.push(charPositions[i].rect);
9456
+ const matchRects = calculateMatchRects3(textItems, matchIndex, text.length, viewport);
9457
+ if (matchRects.length > 0) {
9458
+ const highlight = annotationStore.getState().addHighlight({
9459
+ pageNumber: pageNum,
9460
+ rects: matchRects,
9461
+ color,
9462
+ text: fullText.substring(matchIndex, matchIndex + text.length)
9463
+ });
9464
+ highlightIds.push(highlight.id);
9445
9465
  }
9446
- const mergedRects = mergeRects2(matchRects);
9447
- const highlight = annotationStore.getState().addHighlight({
9448
- pageNumber: pageNum,
9449
- rects: mergedRects,
9450
- color,
9451
- text: fullText.substring(matchIndex, matchIndex + text.length)
9452
- });
9453
- highlightIds.push(highlight.id);
9454
9466
  startIndex = matchIndex + 1;
9455
9467
  }
9456
9468
  } catch {
@@ -9628,33 +9640,22 @@ var init_PDFViewerClient = __esm({
9628
9640
  const textContent = await page.getTextContent();
9629
9641
  const viewport = page.getViewport({ scale: 1 });
9630
9642
  let fullText = "";
9631
- const charPositions = [];
9643
+ const textItems = [];
9632
9644
  for (const item of textContent.items) {
9633
9645
  if ("str" in item && item.str) {
9634
- const tx = item.transform;
9635
- const x = tx[4];
9636
- const y = viewport.height - tx[5];
9637
- const width = item.width ?? 0;
9638
- const height = item.height ?? 12;
9639
- const charWidth = item.str.length > 0 ? width / item.str.length : width;
9640
- for (let i = 0; i < item.str.length; i++) {
9641
- charPositions.push({
9642
- char: item.str[i],
9643
- rect: {
9644
- x: x + i * charWidth,
9645
- y: y - height,
9646
- width: charWidth,
9647
- height
9648
- }
9649
- });
9650
- }
9646
+ textItems.push({
9647
+ text: item.str,
9648
+ transform: item.transform,
9649
+ width: item.width ?? 0,
9650
+ height: item.height ?? 12
9651
+ });
9651
9652
  fullText += item.str;
9652
9653
  }
9653
9654
  }
9654
9655
  const textToSearch = caseSensitive ? fullText : fullText.toLowerCase();
9655
9656
  let startIndex = 0;
9656
9657
  while (true) {
9657
- let matchIndex = textToSearch.indexOf(searchText, startIndex);
9658
+ const matchIndex = textToSearch.indexOf(searchText, startIndex);
9658
9659
  if (matchIndex === -1) break;
9659
9660
  if (wholeWord) {
9660
9661
  const beforeChar = matchIndex > 0 ? textToSearch[matchIndex - 1] : " ";
@@ -9664,26 +9665,24 @@ var init_PDFViewerClient = __esm({
9664
9665
  continue;
9665
9666
  }
9666
9667
  }
9667
- const matchRects = [];
9668
- for (let i = matchIndex; i < matchIndex + query.length && i < charPositions.length; i++) {
9669
- matchRects.push(charPositions[i].rect);
9668
+ const matchRects = calculateMatchRects3(textItems, matchIndex, query.length, viewport);
9669
+ if (matchRects.length > 0) {
9670
+ const highlight = annotationStore.getState().addHighlight({
9671
+ pageNumber: pageNum,
9672
+ rects: matchRects,
9673
+ color,
9674
+ text: fullText.substring(matchIndex, matchIndex + query.length),
9675
+ source: "search"
9676
+ });
9677
+ result.matchCount++;
9678
+ result.highlightIds.push(highlight.id);
9679
+ result.matches.push({
9680
+ pageNumber: pageNum,
9681
+ text: fullText.substring(matchIndex, matchIndex + query.length),
9682
+ highlightId: highlight.id,
9683
+ rects: matchRects
9684
+ });
9670
9685
  }
9671
- const mergedRects = mergeRects2(matchRects);
9672
- const highlight = annotationStore.getState().addHighlight({
9673
- pageNumber: pageNum,
9674
- rects: mergedRects,
9675
- color,
9676
- text: fullText.substring(matchIndex, matchIndex + query.length),
9677
- source: "search"
9678
- });
9679
- result.matchCount++;
9680
- result.highlightIds.push(highlight.id);
9681
- result.matches.push({
9682
- pageNumber: pageNum,
9683
- text: fullText.substring(matchIndex, matchIndex + query.length),
9684
- highlightId: highlight.id,
9685
- rects: mergedRects
9686
- });
9687
9686
  startIndex = matchIndex + 1;
9688
9687
  }
9689
9688
  } catch {