natural-pdf 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +24 -40
- natural_pdf/core/page.py +17 -17
- natural_pdf/core/pdf.py +130 -12
- natural_pdf/elements/collections.py +229 -29
- natural_pdf/elements/region.py +2 -3
- natural_pdf/exporters/hocr.py +540 -0
- natural_pdf/exporters/hocr_font.py +142 -0
- natural_pdf/exporters/original_pdf.py +130 -0
- natural_pdf/exporters/searchable_pdf.py +3 -3
- natural_pdf/ocr/engine_surya.py +1 -1
- {natural_pdf-0.1.10.dist-info → natural_pdf-0.1.11.dist-info}/METADATA +1 -2
- {natural_pdf-0.1.10.dist-info → natural_pdf-0.1.11.dist-info}/RECORD +15 -12
- {natural_pdf-0.1.10.dist-info → natural_pdf-0.1.11.dist-info}/WHEEL +1 -1
- {natural_pdf-0.1.10.dist-info → natural_pdf-0.1.11.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.10.dist-info → natural_pdf-0.1.11.dist-info}/top_level.txt +0 -0
natural_pdf/__init__.py
CHANGED
@@ -37,72 +37,56 @@ def configure_logging(level=logging.INFO, handler=None):
|
|
37
37
|
logger.propagate = False
|
38
38
|
|
39
39
|
|
40
|
+
# Version
|
41
|
+
__version__ = "0.1.1"
|
42
|
+
|
43
|
+
# Core imports
|
44
|
+
from natural_pdf.collections.pdf_collection import PDFCollection
|
40
45
|
from natural_pdf.core.page import Page
|
41
46
|
from natural_pdf.core.pdf import PDF
|
42
47
|
from natural_pdf.elements.collections import ElementCollection
|
43
48
|
from natural_pdf.elements.region import Region
|
44
49
|
|
45
|
-
|
46
|
-
try:
|
47
|
-
from natural_pdf.qa import DocumentQA, get_qa_engine
|
48
|
-
|
49
|
-
HAS_QA = True
|
50
|
-
except ImportError:
|
51
|
-
HAS_QA = False
|
52
|
-
|
53
|
-
__version__ = "0.1.1"
|
54
|
-
|
55
|
-
__all__ = [
|
56
|
-
"PDF",
|
57
|
-
"PDFCollection",
|
58
|
-
"Page",
|
59
|
-
"Region",
|
60
|
-
"ElementCollection",
|
61
|
-
"TextSearchOptions",
|
62
|
-
"MultiModalSearchOptions",
|
63
|
-
"BaseSearchOptions",
|
64
|
-
"configure_logging",
|
65
|
-
]
|
66
|
-
|
67
|
-
if HAS_QA:
|
68
|
-
__all__.extend(["DocumentQA", "get_qa_engine"])
|
69
|
-
|
70
|
-
|
71
|
-
from .collections.pdf_collection import PDFCollection
|
72
|
-
|
73
|
-
# Core classes
|
74
|
-
from .core.pdf import PDF
|
75
|
-
from .elements.region import Region
|
50
|
+
ElementCollection = None
|
76
51
|
|
77
52
|
# Search options (if extras installed)
|
78
53
|
try:
|
79
|
-
from .search.search_options import BaseSearchOptions, MultiModalSearchOptions, TextSearchOptions
|
54
|
+
from natural_pdf.search.search_options import BaseSearchOptions, MultiModalSearchOptions, TextSearchOptions
|
80
55
|
except ImportError:
|
81
56
|
# Define dummy classes if extras not installed, so imports don't break
|
82
57
|
# but using them will raise the ImportError from check_haystack_availability
|
83
|
-
class
|
58
|
+
class BaseSearchOptions:
|
84
59
|
def __init__(self, *args, **kwargs):
|
85
60
|
pass
|
86
61
|
|
87
|
-
class
|
62
|
+
class TextSearchOptions:
|
88
63
|
def __init__(self, *args, **kwargs):
|
89
64
|
pass
|
90
65
|
|
91
|
-
class
|
66
|
+
class MultiModalSearchOptions:
|
92
67
|
def __init__(self, *args, **kwargs):
|
93
68
|
pass
|
94
69
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
70
|
+
# Import QA module if available
|
71
|
+
try:
|
72
|
+
from natural_pdf.qa import DocumentQA, get_qa_engine
|
73
|
+
HAS_QA = True
|
74
|
+
except ImportError:
|
75
|
+
HAS_QA = False
|
99
76
|
|
100
77
|
# Explicitly define what gets imported with 'from natural_pdf import *'
|
101
78
|
__all__ = [
|
102
79
|
"PDF",
|
103
80
|
"PDFCollection",
|
81
|
+
"Page",
|
104
82
|
"Region",
|
105
|
-
"
|
83
|
+
"ElementCollection",
|
84
|
+
"TextSearchOptions",
|
106
85
|
"MultiModalSearchOptions",
|
107
86
|
"BaseSearchOptions",
|
87
|
+
"configure_logging",
|
108
88
|
]
|
89
|
+
|
90
|
+
# Add QA components to __all__ if available
|
91
|
+
if HAS_QA:
|
92
|
+
__all__.extend(["DocumentQA", "get_qa_engine"])
|
natural_pdf/core/page.py
CHANGED
@@ -40,10 +40,10 @@ if TYPE_CHECKING:
|
|
40
40
|
from natural_pdf.elements.base import Element
|
41
41
|
from natural_pdf.elements.collections import ElementCollection
|
42
42
|
|
43
|
-
# New Imports
|
43
|
+
# # New Imports
|
44
44
|
import itertools
|
45
45
|
|
46
|
-
# Deskew Imports (Conditional)
|
46
|
+
# # Deskew Imports (Conditional)
|
47
47
|
import numpy as np
|
48
48
|
from pdfplumber.utils.geometry import get_bbox_overlap, merge_bboxes, objects_to_bbox
|
49
49
|
from pdfplumber.utils.text import TEXTMAP_KWARGS, WORD_EXTRACTOR_KWARGS, chars_to_textmap
|
@@ -55,7 +55,7 @@ from natural_pdf.analyzers.text_options import TextStyleOptions
|
|
55
55
|
from natural_pdf.analyzers.text_structure import TextStyleAnalyzer
|
56
56
|
from natural_pdf.classification.manager import ClassificationManager # For type hint
|
57
57
|
|
58
|
-
# --- Classification Imports --- #
|
58
|
+
# # --- Classification Imports --- #
|
59
59
|
from natural_pdf.classification.mixin import ClassificationMixin # Import classification mixin
|
60
60
|
from natural_pdf.core.element_manager import ElementManager
|
61
61
|
from natural_pdf.elements.base import Element # Import base element
|
@@ -66,7 +66,7 @@ from natural_pdf.ocr.utils import _apply_ocr_correction_to_elements
|
|
66
66
|
from natural_pdf.qa import DocumentQA, get_qa_engine
|
67
67
|
from natural_pdf.utils.locks import pdf_render_lock # Import the lock
|
68
68
|
|
69
|
-
# Import new utils
|
69
|
+
# # Import new utils
|
70
70
|
from natural_pdf.utils.text_extraction import filter_chars_spatially, generate_text_layout
|
71
71
|
from natural_pdf.widgets import InteractiveViewerWidget
|
72
72
|
from natural_pdf.widgets.viewer import _IPYWIDGETS_AVAILABLE, SimpleInteractiveViewerWidget
|
@@ -210,7 +210,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
210
210
|
|
211
211
|
def add_exclusion(
|
212
212
|
self,
|
213
|
-
exclusion_func_or_region: Union[Callable[["Page"], Region], Region, Any],
|
213
|
+
exclusion_func_or_region: Union[Callable[["Page"], "Region"], "Region", Any],
|
214
214
|
label: Optional[str] = None,
|
215
215
|
) -> "Page":
|
216
216
|
"""
|
@@ -274,7 +274,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
274
274
|
|
275
275
|
return self
|
276
276
|
|
277
|
-
def add_region(self, region: Region, name: Optional[str] = None) -> "Page":
|
277
|
+
def add_region(self, region: "Region", name: Optional[str] = None) -> "Page":
|
278
278
|
"""
|
279
279
|
Add a region to the page.
|
280
280
|
|
@@ -305,7 +305,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
305
305
|
|
306
306
|
return self
|
307
307
|
|
308
|
-
def add_regions(self, regions: List[Region], prefix: Optional[str] = None) -> "Page":
|
308
|
+
def add_regions(self, regions: List["Region"], prefix: Optional[str] = None) -> "Page":
|
309
309
|
"""
|
310
310
|
Add multiple regions to the page.
|
311
311
|
|
@@ -327,7 +327,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
327
327
|
|
328
328
|
return self
|
329
329
|
|
330
|
-
def _get_exclusion_regions(self, include_callable=True, debug=False) -> List[Region]:
|
330
|
+
def _get_exclusion_regions(self, include_callable=True, debug=False) -> List["Region"]:
|
331
331
|
"""
|
332
332
|
Get all exclusion regions for this page.
|
333
333
|
Assumes self._exclusions contains tuples of (callable/Region, label).
|
@@ -1349,7 +1349,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1349
1349
|
self._highlighter.clear_page(self.index)
|
1350
1350
|
return self
|
1351
1351
|
|
1352
|
-
def analyze_text_styles(self, options: Optional[TextStyleOptions] = None) -> ElementCollection:
|
1352
|
+
def analyze_text_styles(self, options: Optional[TextStyleOptions] = None) -> "ElementCollection":
|
1353
1353
|
"""
|
1354
1354
|
Analyze text elements by style, adding attributes directly to elements.
|
1355
1355
|
|
@@ -1520,7 +1520,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1520
1520
|
|
1521
1521
|
def _create_text_elements_from_ocr(
|
1522
1522
|
self, ocr_results: List[Dict[str, Any]], image_width=None, image_height=None
|
1523
|
-
) -> List[TextElement]:
|
1523
|
+
) -> List["TextElement"]:
|
1524
1524
|
"""DEPRECATED: Use self._element_mgr.create_text_elements_from_ocr"""
|
1525
1525
|
logger.warning(
|
1526
1526
|
"_create_text_elements_from_ocr is deprecated. Use self._element_mgr version."
|
@@ -1532,7 +1532,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1532
1532
|
def apply_ocr(
|
1533
1533
|
self,
|
1534
1534
|
engine: Optional[str] = None,
|
1535
|
-
options: Optional[OCROptions] = None,
|
1535
|
+
options: Optional["OCROptions"] = None,
|
1536
1536
|
languages: Optional[List[str]] = None,
|
1537
1537
|
min_confidence: Optional[float] = None,
|
1538
1538
|
device: Optional[str] = None,
|
@@ -1597,12 +1597,12 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1597
1597
|
def extract_ocr_elements(
|
1598
1598
|
self,
|
1599
1599
|
engine: Optional[str] = None,
|
1600
|
-
options: Optional[OCROptions] = None,
|
1600
|
+
options: Optional["OCROptions"] = None,
|
1601
1601
|
languages: Optional[List[str]] = None,
|
1602
1602
|
min_confidence: Optional[float] = None,
|
1603
1603
|
device: Optional[str] = None,
|
1604
1604
|
resolution: Optional[int] = None,
|
1605
|
-
) -> List[TextElement]:
|
1605
|
+
) -> List["TextElement"]:
|
1606
1606
|
"""
|
1607
1607
|
Extract text elements using OCR *without* adding them to the page's elements.
|
1608
1608
|
Uses the shared OCRManager instance.
|
@@ -1716,7 +1716,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1716
1716
|
return (self._page.width, self._page.height)
|
1717
1717
|
|
1718
1718
|
@property
|
1719
|
-
def layout_analyzer(self) -> LayoutAnalyzer:
|
1719
|
+
def layout_analyzer(self) -> "LayoutAnalyzer":
|
1720
1720
|
"""Get or create the layout analyzer for this page."""
|
1721
1721
|
if self._layout_analyzer is None:
|
1722
1722
|
if not self._layout_manager:
|
@@ -1728,7 +1728,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1728
1728
|
def analyze_layout(
|
1729
1729
|
self,
|
1730
1730
|
engine: Optional[str] = None,
|
1731
|
-
options: Optional[LayoutOptions] = None,
|
1731
|
+
options: Optional["LayoutOptions"] = None,
|
1732
1732
|
confidence: Optional[float] = None,
|
1733
1733
|
classes: Optional[List[str]] = None,
|
1734
1734
|
exclude_classes: Optional[List[str]] = None,
|
@@ -1736,7 +1736,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1736
1736
|
existing: str = "replace",
|
1737
1737
|
model_name: Optional[str] = None,
|
1738
1738
|
client: Optional[Any] = None, # Add client parameter
|
1739
|
-
) -> ElementCollection[Region]:
|
1739
|
+
) -> "ElementCollection[Region]":
|
1740
1740
|
"""
|
1741
1741
|
Analyze the page layout using the configured LayoutManager.
|
1742
1742
|
Adds detected Region objects to the page's element manager.
|
@@ -1813,7 +1813,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1813
1813
|
|
1814
1814
|
def get_section_between(
|
1815
1815
|
self, start_element=None, end_element=None, boundary_inclusion="both"
|
1816
|
-
) -> Optional[Region]: # Return Optional
|
1816
|
+
) -> Optional["Region"]: # Return Optional
|
1817
1817
|
"""
|
1818
1818
|
Get a section between two elements on this page.
|
1819
1819
|
"""
|
natural_pdf/core/pdf.py
CHANGED
@@ -60,6 +60,14 @@ except ImportError:
|
|
60
60
|
"Search dependencies are not installed. Install with: pip install natural-pdf[search]"
|
61
61
|
)
|
62
62
|
|
63
|
+
try:
|
64
|
+
from natural_pdf.exporters.searchable_pdf import create_searchable_pdf
|
65
|
+
except ImportError:
|
66
|
+
create_searchable_pdf = None
|
67
|
+
try:
|
68
|
+
from natural_pdf.exporters.original_pdf import create_original_pdf
|
69
|
+
except ImportError:
|
70
|
+
create_original_pdf = None
|
63
71
|
|
64
72
|
logger = logging.getLogger("natural_pdf.core.pdf")
|
65
73
|
tqdm = get_tqdm()
|
@@ -260,7 +268,7 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
260
268
|
return self
|
261
269
|
|
262
270
|
def add_exclusion(
|
263
|
-
self, exclusion_func: Callable[["Page"], Optional[Region]], label: str = None
|
271
|
+
self, exclusion_func: Callable[["Page"], Optional["Region"]], label: str = None
|
264
272
|
) -> "PDF":
|
265
273
|
"""
|
266
274
|
Add an exclusion function to the PDF. Text from these regions will be excluded from extraction.
|
@@ -468,7 +476,7 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
468
476
|
return self
|
469
477
|
|
470
478
|
def add_region(
|
471
|
-
self, region_func: Callable[["Page"], Optional[Region]], name: str = None
|
479
|
+
self, region_func: Callable[["Page"], Optional["Region"]], name: str = None
|
472
480
|
) -> "PDF":
|
473
481
|
"""
|
474
482
|
Add a region function to the PDF.
|
@@ -769,23 +777,133 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
769
777
|
|
770
778
|
def save_searchable(self, output_path: Union[str, "Path"], dpi: int = 300, **kwargs):
|
771
779
|
"""
|
780
|
+
DEPRECATED: Use save_pdf(..., ocr=True) instead.
|
772
781
|
Saves the PDF with an OCR text layer, making content searchable.
|
773
782
|
|
774
|
-
Requires optional dependencies. Install with: pip install "natural-pdf[ocr-
|
783
|
+
Requires optional dependencies. Install with: pip install \"natural-pdf[ocr-export]\"
|
775
784
|
|
776
785
|
Args:
|
777
786
|
output_path: Path to save the searchable PDF
|
778
787
|
dpi: Resolution for rendering and OCR overlay
|
779
788
|
**kwargs: Additional keyword arguments passed to the exporter
|
780
|
-
output_path: Path to save the searchable PDF
|
781
|
-
dpi: Resolution for rendering and OCR overlay
|
782
|
-
**kwargs: Additional keyword arguments passed to the exporter
|
783
789
|
"""
|
784
|
-
|
785
|
-
|
790
|
+
logger.warning(
|
791
|
+
"PDF.save_searchable() is deprecated. Use PDF.save_pdf(..., ocr=True) instead."
|
792
|
+
)
|
793
|
+
if create_searchable_pdf is None:
|
794
|
+
raise ImportError(
|
795
|
+
"Saving searchable PDF requires 'pikepdf' and 'Pillow'. "
|
796
|
+
"Install with: pip install \"natural-pdf[ocr-export]\""
|
797
|
+
)
|
786
798
|
output_path_str = str(output_path)
|
799
|
+
# Call the exporter directly, passing self (the PDF instance)
|
787
800
|
create_searchable_pdf(self, output_path_str, dpi=dpi, **kwargs)
|
788
|
-
|
801
|
+
# Logger info is handled within the exporter now
|
802
|
+
# logger.info(f"Searchable PDF saved to: {output_path_str}")
|
803
|
+
|
804
|
+
def save_pdf(
|
805
|
+
self,
|
806
|
+
output_path: Union[str, Path],
|
807
|
+
ocr: bool = False,
|
808
|
+
original: bool = False,
|
809
|
+
dpi: int = 300,
|
810
|
+
):
|
811
|
+
"""
|
812
|
+
Saves the PDF object (all its pages) to a new file.
|
813
|
+
|
814
|
+
Choose one saving mode:
|
815
|
+
- `ocr=True`: Creates a new, image-based PDF using OCR results from all pages.
|
816
|
+
Text generated during the natural-pdf session becomes searchable,
|
817
|
+
but original vector content is lost. Requires 'ocr-export' extras.
|
818
|
+
- `original=True`: Saves a copy of the original PDF file this object represents.
|
819
|
+
Any OCR results or analyses from the natural-pdf session are NOT included.
|
820
|
+
If the PDF was opened from an in-memory buffer, this mode may not be suitable.
|
821
|
+
Requires 'ocr-export' extras.
|
822
|
+
|
823
|
+
Args:
|
824
|
+
output_path: Path to save the new PDF file.
|
825
|
+
ocr: If True, save as a searchable, image-based PDF using OCR data.
|
826
|
+
original: If True, save the original source PDF content.
|
827
|
+
dpi: Resolution (dots per inch) used only when ocr=True.
|
828
|
+
|
829
|
+
Raises:
|
830
|
+
ValueError: If the PDF has no pages, if neither or both 'ocr'
|
831
|
+
and 'original' are True.
|
832
|
+
ImportError: If required libraries are not installed for the chosen mode.
|
833
|
+
RuntimeError: If an unexpected error occurs during saving.
|
834
|
+
"""
|
835
|
+
if not self.pages:
|
836
|
+
raise ValueError("Cannot save an empty PDF object.")
|
837
|
+
|
838
|
+
if not (ocr ^ original): # XOR: exactly one must be true
|
839
|
+
raise ValueError("Exactly one of 'ocr' or 'original' must be True.")
|
840
|
+
|
841
|
+
output_path_obj = Path(output_path)
|
842
|
+
output_path_str = str(output_path_obj)
|
843
|
+
|
844
|
+
if ocr:
|
845
|
+
if create_searchable_pdf is None:
|
846
|
+
raise ImportError(
|
847
|
+
"Saving with ocr=True requires 'pikepdf' and 'Pillow'. "
|
848
|
+
"Install with: pip install \"natural-pdf[ocr-export]\""
|
849
|
+
)
|
850
|
+
|
851
|
+
# Optional: Add warning about vector data loss similar to PageCollection
|
852
|
+
has_vector_elements = False
|
853
|
+
for page in self.pages:
|
854
|
+
if (hasattr(page, 'rects') and page.rects or
|
855
|
+
hasattr(page, 'lines') and page.lines or
|
856
|
+
hasattr(page, 'curves') and page.curves or
|
857
|
+
(hasattr(page, 'chars') and any(getattr(el, 'source', None) != 'ocr' for el in page.chars)) or
|
858
|
+
(hasattr(page, 'words') and any(getattr(el, 'source', None) != 'ocr' for el in page.words))):
|
859
|
+
has_vector_elements = True
|
860
|
+
break
|
861
|
+
if has_vector_elements:
|
862
|
+
logger.warning(
|
863
|
+
"Warning: Saving with ocr=True creates an image-based PDF. "
|
864
|
+
"Original vector elements (rects, lines, non-OCR text/chars) "
|
865
|
+
"will not be preserved in the output file."
|
866
|
+
)
|
867
|
+
|
868
|
+
logger.info(f"Saving searchable PDF (OCR text layer) to: {output_path_str}")
|
869
|
+
try:
|
870
|
+
# Delegate to the searchable PDF exporter, passing self (PDF instance)
|
871
|
+
create_searchable_pdf(self, output_path_str, dpi=dpi)
|
872
|
+
except Exception as e:
|
873
|
+
raise RuntimeError(f"Failed to create searchable PDF: {e}") from e
|
874
|
+
|
875
|
+
elif original:
|
876
|
+
if create_original_pdf is None:
|
877
|
+
raise ImportError(
|
878
|
+
"Saving with original=True requires 'pikepdf'. "
|
879
|
+
"Install with: pip install \"natural-pdf[ocr-export]\""
|
880
|
+
)
|
881
|
+
|
882
|
+
# Optional: Add warning about losing OCR data similar to PageCollection
|
883
|
+
has_ocr_elements = False
|
884
|
+
for page in self.pages:
|
885
|
+
if hasattr(page, 'find_all'):
|
886
|
+
ocr_text_elements = page.find_all("text[source=ocr]")
|
887
|
+
if ocr_text_elements:
|
888
|
+
has_ocr_elements = True
|
889
|
+
break
|
890
|
+
elif hasattr(page, 'words'): # Fallback
|
891
|
+
if any(getattr(el, 'source', None) == 'ocr' for el in page.words):
|
892
|
+
has_ocr_elements = True
|
893
|
+
break
|
894
|
+
if has_ocr_elements:
|
895
|
+
logger.warning(
|
896
|
+
"Warning: Saving with original=True preserves original page content. "
|
897
|
+
"OCR text generated in this session will not be included in the saved file."
|
898
|
+
)
|
899
|
+
|
900
|
+
logger.info(f"Saving original PDF content to: {output_path_str}")
|
901
|
+
try:
|
902
|
+
# Delegate to the original PDF exporter, passing self (PDF instance)
|
903
|
+
create_original_pdf(self, output_path_str)
|
904
|
+
except Exception as e:
|
905
|
+
# Re-raise exception from exporter
|
906
|
+
raise e
|
789
907
|
|
790
908
|
def ask(
|
791
909
|
self,
|
@@ -850,9 +968,9 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
850
968
|
|
851
969
|
def search_within_index(
|
852
970
|
self,
|
853
|
-
query: Union[str, Path, Image.Image, Region],
|
854
|
-
search_service: SearchServiceProtocol,
|
855
|
-
options: Optional[SearchOptions] = None,
|
971
|
+
query: Union[str, Path, Image.Image, "Region"],
|
972
|
+
search_service: "SearchServiceProtocol",
|
973
|
+
options: Optional["SearchOptions"] = None,
|
856
974
|
) -> List[Dict[str, Any]]:
|
857
975
|
"""
|
858
976
|
Finds relevant documents from this PDF within a search index.
|