npm - pdfjs-reader-core - Versions diffs - 0.2.0 → 0.2.1 - Mend

pdfjs-reader-core 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -2431,12 +2431,24 @@ interface CharPosition {
     char: string;
     rect: HighlightRect;
 }
+/** Internal text item representation */
+interface TextItem {
+    text: string;
+    transform: number[];
+    width: number;
+    height: number;
+}
 /**
- * Extract text content with character positions from a PDF page.
+ * Extract text content with text items from a PDF page.
+ * Uses proper text item tracking for accurate positioning.
  */
 declare function extractPageText(document: PDFDocumentProxy, pageNumber: number): Promise<{
     fullText: string;
-    charPositions: CharPosition[];
+    textItems: TextItem[];
+    viewport: {
+        width: number;
+        height: number;
+    };
 }>;
 /**
  * Find all occurrences of text on a specific page.

package/dist/index.d.ts CHANGED Viewed

@@ -2431,12 +2431,24 @@ interface CharPosition {
     char: string;
     rect: HighlightRect;
 }
+/** Internal text item representation */
+interface TextItem {
+    text: string;
+    transform: number[];
+    width: number;
+    height: number;
+}
 /**
- * Extract text content with character positions from a PDF page.
+ * Extract text content with text items from a PDF page.
+ * Uses proper text item tracking for accurate positioning.
  */
 declare function extractPageText(document: PDFDocumentProxy, pageNumber: number): Promise<{
     fullText: string;
-    charPositions: CharPosition[];
+    textItems: TextItem[];
+    viewport: {
+        width: number;
+        height: number;
+    };
 }>;
 /**
  * Find all occurrences of text on a specific page.

package/dist/index.js CHANGED Viewed

@@ -1307,37 +1307,54 @@ async function extractPageText(document2, pageNumber) {
   const textContent = await page.getTextContent();
   const viewport = page.getViewport({ scale: 1 });
   let fullText = "";
-  const charPositions = [];
+  const textItems = [];
   for (const item of textContent.items) {
     if ("str" in item && item.str) {
-      const tx = item.transform;
-      const x = tx[4];
-      const y = viewport.height - tx[5];
-      const width = item.width ?? 0;
-      const height = item.height ?? 12;
-      const charWidth = item.str.length > 0 ? width / item.str.length : width;
-      for (let i = 0; i < item.str.length; i++) {
-        charPositions.push({
-          char: item.str[i],
-          rect: {
-            x: x + i * charWidth,
-            y: y - height,
-            width: charWidth,
-            height
-          }
-        });
-      }
+      textItems.push({
+        text: item.str,
+        transform: item.transform,
+        width: item.width ?? 0,
+        height: item.height ?? 12
+      });
       fullText += item.str;
     }
   }
-  return { fullText, charPositions };
+  return { fullText, textItems, viewport };
+}
+function calculateMatchRects(textItems, startOffset, length, viewport) {
+  const rects = [];
+  let currentOffset = 0;
+  for (const item of textItems) {
+    const itemStart = currentOffset;
+    const itemEnd = currentOffset + item.text.length;
+    if (itemEnd > startOffset && itemStart < startOffset + length) {
+      const [, , c, d, tx, ty] = item.transform;
+      const x = tx;
+      const y = viewport.height - ty;
+      const height = Math.sqrt(c * c + d * d);
+      const matchStartInItem = Math.max(0, startOffset - itemStart);
+      const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
+      const charWidth = item.text.length > 0 ? item.width / item.text.length : item.width;
+      const matchWidth = charWidth * (matchEndInItem - matchStartInItem);
+      const matchX = x + charWidth * matchStartInItem;
+      const yOffset = height * 0.15;
+      rects.push({
+        x: matchX,
+        y: y - height + yOffset,
+        width: matchWidth,
+        height
+      });
+    }
+    currentOffset = itemEnd;
+  }
+  return rects;
 }
 async function findTextOnPage(document2, pageNumber, query, options = {}) {
   const { caseSensitive = false, wholeWord = false } = options;
   if (!query || pageNumber < 1 || pageNumber > document2.numPages) {
     return [];
   }
-  const { fullText, charPositions } = await extractPageText(document2, pageNumber);
+  const { fullText, textItems, viewport } = await extractPageText(document2, pageNumber);
   const matches = [];
   const searchText = caseSensitive ? query : query.toLowerCase();
   const textToSearch = caseSensitive ? fullText : fullText.toLowerCase();
@@ -1353,17 +1370,15 @@ async function findTextOnPage(document2, pageNumber, query, options = {}) {
         continue;
       }
     }
-    const matchRects = [];
-    for (let i = matchIndex; i < matchIndex + query.length && i < charPositions.length; i++) {
-      matchRects.push(charPositions[i].rect);
+    const matchRects = calculateMatchRects(textItems, matchIndex, query.length, viewport);
+    if (matchRects.length > 0) {
+      matches.push({
+        text: fullText.substring(matchIndex, matchIndex + query.length),
+        rects: matchRects,
+        pageNumber,
+        startIndex: matchIndex
+      });
     }
-    const mergedRects = mergeAdjacentRects(matchRects);
-    matches.push({
-      text: fullText.substring(matchIndex, matchIndex + query.length),
-      rects: mergedRects,
-      pageNumber,
-      startIndex: matchIndex
-    });
     startIndex = matchIndex + 1;
   }
   return matches;
@@ -1617,7 +1632,7 @@ function createSearchStore(initialOverrides = {}) {
               }
             }
             const matchText = pageText.substring(startIndex, startIndex + query.length);
-            const rects = calculateMatchRects(textItems, startIndex, query.length, viewport);
+            const rects = calculateMatchRects2(textItems, startIndex, query.length, viewport);
             results.push({
               pageNumber: pageNum,
               matchIndex: matchIndex++,
@@ -1678,7 +1693,7 @@ function createSearchStore(initialOverrides = {}) {
     }
   }));
 }
-function calculateMatchRects(textItems, startOffset, length, viewport) {
+function calculateMatchRects2(textItems, startOffset, length, viewport) {
   const rects = [];
   let currentOffset = 0;
   for (const item of textItems) {
@@ -1693,9 +1708,10 @@ function calculateMatchRects(textItems, startOffset, length, viewport) {
       const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
       const matchWidth = item.width / item.text.length * (matchEndInItem - matchStartInItem);
       const matchX = x + item.width / item.text.length * matchStartInItem;
+      const yOffset = height * 0.15;
       rects.push({
         x: matchX,
-        y: y - height,
+        y: y - height + yOffset,
         width: matchWidth,
         height
       });
@@ -9267,24 +9283,33 @@ function getSrcIdentifier(src) {
   const last = Array.from(data.slice(-4)).map((b) => b.toString(16).padStart(2, "0")).join("");
   return `binary:${len}:${first}:${last}`;
 }
-function mergeRects2(rects) {
-  if (rects.length === 0) return [];
-  const sorted = [...rects].sort((a, b) => a.y - b.y || a.x - b.x);
-  const merged = [];
-  let current = { ...sorted[0] };
-  for (let i = 1; i < sorted.length; i++) {
-    const rect = sorted[i];
-    if (Math.abs(rect.y - current.y) < 2 && rect.x <= current.x + current.width + 2) {
-      const newRight = Math.max(current.x + current.width, rect.x + rect.width);
-      current.width = newRight - current.x;
-      current.height = Math.max(current.height, rect.height);
-    } else {
-      merged.push(current);
-      current = { ...rect };
+function calculateMatchRects3(textItems, startOffset, length, viewport) {
+  const rects = [];
+  let currentOffset = 0;
+  for (const item of textItems) {
+    const itemStart = currentOffset;
+    const itemEnd = currentOffset + item.text.length;
+    if (itemEnd > startOffset && itemStart < startOffset + length) {
+      const [, , c, d, tx, ty] = item.transform;
+      const x = tx;
+      const y = viewport.height - ty;
+      const height = Math.sqrt(c * c + d * d);
+      const matchStartInItem = Math.max(0, startOffset - itemStart);
+      const matchEndInItem = Math.min(item.text.length, startOffset + length - itemStart);
+      const charWidth = item.text.length > 0 ? item.width / item.text.length : item.width;
+      const matchWidth = charWidth * (matchEndInItem - matchStartInItem);
+      const matchX = x + charWidth * matchStartInItem;
+      const yOffset = height * 0.15;
+      rects.push({
+        x: matchX,
+        y: y - height + yOffset,
+        width: matchWidth,
+        height
+      });
     }
+    currentOffset = itemEnd;
   }
-  merged.push(current);
-  return merged;
+  return rects;
 }
 var PDFViewerInner, PDFViewerInnerWithRef, PDFViewerClient;
 var init_PDFViewerClient = __esm({
@@ -9381,26 +9406,15 @@ var init_PDFViewerClient = __esm({
                 const textContent = await page.getTextContent();
                 const viewport = page.getViewport({ scale: 1 });
                 let fullText = "";
-                const charPositions = [];
+                const textItems = [];
                 for (const item of textContent.items) {
                   if ("str" in item && item.str) {
-                    const tx = item.transform;
-                    const x = tx[4];
-                    const y = viewport.height - tx[5];
-                    const width = item.width ?? 0;
-                    const height = item.height ?? 12;
-                    const charWidth = item.str.length > 0 ? width / item.str.length : width;
-                    for (let i = 0; i < item.str.length; i++) {
-                      charPositions.push({
-                        char: item.str[i],
-                        rect: {
-                          x: x + i * charWidth,
-                          y: y - height,
-                          width: charWidth,
-                          height
-                        }
-                      });
-                    }
+                    textItems.push({
+                      text: item.str,
+                      transform: item.transform,
+                      width: item.width ?? 0,
+                      height: item.height ?? 12
+                    });
                     fullText += item.str;
                   }
                 }
@@ -9409,18 +9423,16 @@ var init_PDFViewerClient = __esm({
                 while (true) {
                   const matchIndex = textToSearch.indexOf(searchText, startIndex);
                   if (matchIndex === -1) break;
-                  const matchRects = [];
-                  for (let i = matchIndex; i < matchIndex + text.length && i < charPositions.length; i++) {
-                    matchRects.push(charPositions[i].rect);
+                  const matchRects = calculateMatchRects3(textItems, matchIndex, text.length, viewport);
+                  if (matchRects.length > 0) {
+                    const highlight = annotationStore.getState().addHighlight({
+                      pageNumber: pageNum,
+                      rects: matchRects,
+                      color,
+                      text: fullText.substring(matchIndex, matchIndex + text.length)
+                    });
+                    highlightIds.push(highlight.id);
                   }
-                  const mergedRects = mergeRects2(matchRects);
-                  const highlight = annotationStore.getState().addHighlight({
-                    pageNumber: pageNum,
-                    rects: mergedRects,
-                    color,
-                    text: fullText.substring(matchIndex, matchIndex + text.length)
-                  });
-                  highlightIds.push(highlight.id);
                   startIndex = matchIndex + 1;
                 }
               } catch {
@@ -9598,33 +9610,22 @@ var init_PDFViewerClient = __esm({
                 const textContent = await page.getTextContent();
                 const viewport = page.getViewport({ scale: 1 });
                 let fullText = "";
-                const charPositions = [];
+                const textItems = [];
                 for (const item of textContent.items) {
                   if ("str" in item && item.str) {
-                    const tx = item.transform;
-                    const x = tx[4];
-                    const y = viewport.height - tx[5];
-                    const width = item.width ?? 0;
-                    const height = item.height ?? 12;
-                    const charWidth = item.str.length > 0 ? width / item.str.length : width;
-                    for (let i = 0; i < item.str.length; i++) {
-                      charPositions.push({
-                        char: item.str[i],
-                        rect: {
-                          x: x + i * charWidth,
-                          y: y - height,
-                          width: charWidth,
-                          height
-                        }
-                      });
-                    }
+                    textItems.push({
+                      text: item.str,
+                      transform: item.transform,
+                      width: item.width ?? 0,
+                      height: item.height ?? 12
+                    });
                     fullText += item.str;
                   }
                 }
                 const textToSearch = caseSensitive ? fullText : fullText.toLowerCase();
                 let startIndex = 0;
                 while (true) {
-                  let matchIndex = textToSearch.indexOf(searchText, startIndex);
+                  const matchIndex = textToSearch.indexOf(searchText, startIndex);
                   if (matchIndex === -1) break;
                   if (wholeWord) {
                     const beforeChar = matchIndex > 0 ? textToSearch[matchIndex - 1] : " ";
@@ -9634,26 +9635,24 @@ var init_PDFViewerClient = __esm({
                       continue;
                     }
                   }
-                  const matchRects = [];
-                  for (let i = matchIndex; i < matchIndex + query.length && i < charPositions.length; i++) {
-                    matchRects.push(charPositions[i].rect);
+                  const matchRects = calculateMatchRects3(textItems, matchIndex, query.length, viewport);
+                  if (matchRects.length > 0) {
+                    const highlight = annotationStore.getState().addHighlight({
+                      pageNumber: pageNum,
+                      rects: matchRects,
+                      color,
+                      text: fullText.substring(matchIndex, matchIndex + query.length),
+                      source: "search"
+                    });
+                    result.matchCount++;
+                    result.highlightIds.push(highlight.id);
+                    result.matches.push({
+                      pageNumber: pageNum,
+                      text: fullText.substring(matchIndex, matchIndex + query.length),
+                      highlightId: highlight.id,
+                      rects: matchRects
+                    });
                   }
-                  const mergedRects = mergeRects2(matchRects);
-                  const highlight = annotationStore.getState().addHighlight({
-                    pageNumber: pageNum,
-                    rects: mergedRects,
-                    color,
-                    text: fullText.substring(matchIndex, matchIndex + query.length),
-                    source: "search"
-                  });
-                  result.matchCount++;
-                  result.highlightIds.push(highlight.id);
-                  result.matches.push({
-                    pageNumber: pageNum,
-                    text: fullText.substring(matchIndex, matchIndex + query.length),
-                    highlightId: highlight.id,
-                    rects: mergedRects
-                  });
                   startIndex = matchIndex + 1;
                 }
               } catch {