natural-pdf 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +7 -2
- natural_pdf/analyzers/text_options.py +9 -1
- natural_pdf/analyzers/text_structure.py +371 -58
- natural_pdf/classification/manager.py +1 -1
- natural_pdf/core/element_manager.py +11 -1
- natural_pdf/core/highlighting_service.py +120 -40
- natural_pdf/core/page.py +4 -2
- natural_pdf/core/pdf.py +53 -38
- natural_pdf/elements/base.py +17 -0
- natural_pdf/elements/collections.py +203 -59
- natural_pdf/elements/region.py +43 -11
- natural_pdf/exporters/data/__init__.py +0 -0
- natural_pdf/exporters/data/pdf.ttf +0 -0
- natural_pdf/exporters/data/sRGB.icc +0 -0
- natural_pdf/exporters/hocr.py +40 -61
- natural_pdf/exporters/hocr_font.py +7 -13
- natural_pdf/exporters/original_pdf.py +10 -13
- natural_pdf/exporters/searchable_pdf.py +0 -10
- natural_pdf/search/__init__.py +65 -52
- natural_pdf/search/lancedb_search_service.py +325 -0
- natural_pdf/search/numpy_search_service.py +255 -0
- natural_pdf/search/searchable_mixin.py +25 -71
- natural_pdf/widgets/viewer.py +22 -31
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.12.dist-info}/METADATA +54 -49
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.12.dist-info}/RECORD +28 -25
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.12.dist-info}/WHEEL +1 -1
- natural_pdf/search/haystack_search_service.py +0 -687
- natural_pdf/search/haystack_utils.py +0 -474
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.12.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.12.dist-info}/top_level.txt +0 -0
@@ -611,13 +611,13 @@ class HighlightingService:
|
|
611
611
|
|
612
612
|
Args:
|
613
613
|
page_index: The 0-based index of the page to render.
|
614
|
-
scale: Scale factor for rendering highlights.
|
614
|
+
scale: Scale factor for rendering highlights if width/height/resolution not in kwargs.
|
615
615
|
labels: Whether to include a legend for highlights.
|
616
616
|
legend_position: Position of the legend.
|
617
617
|
render_ocr: Whether to render OCR text on the image.
|
618
|
-
resolution: Optional resolution (DPI) for the base page image.
|
619
|
-
Defaults to scale * 72.
|
620
|
-
kwargs: Additional keyword arguments for pdfplumber's page.to_image.
|
618
|
+
resolution: Optional resolution (DPI) for the base page image if width/height not in kwargs.
|
619
|
+
Defaults to scale * 72 if not otherwise specified.
|
620
|
+
kwargs: Additional keyword arguments for pdfplumber's page.to_image (e.g., width, height).
|
621
621
|
|
622
622
|
Returns:
|
623
623
|
A PIL Image object of the rendered page, or None if rendering fails.
|
@@ -626,34 +626,81 @@ class HighlightingService:
|
|
626
626
|
logger.error(f"Invalid page index {page_index} for rendering.")
|
627
627
|
return None
|
628
628
|
|
629
|
-
|
629
|
+
page_obj = self._pdf[page_index] # Renamed to avoid conflict
|
630
630
|
highlights_on_page = self.get_highlights_for_page(page_index)
|
631
631
|
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
632
|
+
to_image_args = kwargs.copy()
|
633
|
+
actual_scale_x = None
|
634
|
+
actual_scale_y = None
|
635
|
+
|
636
|
+
if "width" in to_image_args and to_image_args["width"] is not None:
|
637
|
+
logger.debug(f"Rendering page {page_index} with width={to_image_args['width']}.")
|
638
|
+
if "height" in to_image_args: to_image_args.pop("height", None)
|
639
|
+
# Actual scale will be calculated after image creation
|
640
|
+
elif "height" in to_image_args and to_image_args["height"] is not None:
|
641
|
+
logger.debug(f"Rendering page {page_index} with height={to_image_args['height']}.")
|
642
|
+
# Actual scale will be calculated after image creation
|
643
|
+
else:
|
644
|
+
# Use explicit resolution from kwargs if present, then the resolution param, then scale
|
645
|
+
render_resolution = to_image_args.pop("resolution", resolution) # Use and remove from kwargs if present
|
646
|
+
if render_resolution is None:
|
647
|
+
render_resolution = scale * 72
|
648
|
+
to_image_args["resolution"] = render_resolution # Add it back for the call
|
649
|
+
actual_scale_x = render_resolution / 72.0
|
650
|
+
actual_scale_y = render_resolution / 72.0
|
651
|
+
logger.debug(f"Rendering page {page_index} with resolution {render_resolution} (scale: {actual_scale_x:.2f}).")
|
652
|
+
|
653
|
+
try:
|
654
|
+
# base_image = render_plain_page(page_obj, actual_scale_x * 72 if actual_scale_x else scale * 72) # Old call
|
655
|
+
img_object = page_obj._page.to_image(**to_image_args)
|
656
|
+
base_image_pil = (
|
657
|
+
img_object.annotated
|
658
|
+
if hasattr(img_object, "annotated")
|
659
|
+
else img_object._repr_png_()
|
660
|
+
)
|
661
|
+
if isinstance(base_image_pil, bytes):
|
662
|
+
from io import BytesIO
|
663
|
+
base_image_pil = Image.open(BytesIO(base_image_pil))
|
664
|
+
base_image_pil = base_image_pil.convert("RGBA") # Ensure RGBA for renderer
|
665
|
+
logger.debug(
|
666
|
+
f"Base image for page {page_index} rendered. Size: {base_image_pil.size}."
|
667
|
+
)
|
668
|
+
|
669
|
+
if actual_scale_x is None or actual_scale_y is None: # If not set by resolution path
|
670
|
+
if page_obj.width > 0:
|
671
|
+
actual_scale_x = base_image_pil.width / page_obj.width
|
672
|
+
else:
|
673
|
+
actual_scale_x = scale # Fallback
|
674
|
+
if page_obj.height > 0:
|
675
|
+
actual_scale_y = base_image_pil.height / page_obj.height
|
676
|
+
else:
|
677
|
+
actual_scale_y = scale # Fallback
|
678
|
+
logger.debug(f"Calculated actual scales for page {page_index}: x={actual_scale_x:.2f}, y={actual_scale_y:.2f}")
|
679
|
+
|
680
|
+
except Exception as e:
|
681
|
+
logger.error(f"Error creating base image for page {page_index}: {e}", exc_info=True)
|
682
|
+
return None
|
683
|
+
|
684
|
+
renderer_scale = actual_scale_x # Assuming aspect ratio maintained, use x_scale
|
638
685
|
|
639
686
|
# --- Render Highlights ---
|
640
687
|
rendered_image: Image.Image
|
641
688
|
if highlights_on_page:
|
642
689
|
renderer = HighlightRenderer(
|
643
|
-
page=
|
644
|
-
base_image=
|
690
|
+
page=page_obj,
|
691
|
+
base_image=base_image_pil,
|
645
692
|
highlights=highlights_on_page,
|
646
|
-
scale=scale
|
693
|
+
scale=renderer_scale, # Use the determined actual scale
|
647
694
|
render_ocr=render_ocr,
|
648
695
|
)
|
649
696
|
rendered_image = renderer.render()
|
650
697
|
else:
|
651
698
|
if render_ocr:
|
652
|
-
# Still render OCR even if no highlights
|
653
|
-
renderer = HighlightRenderer(
|
699
|
+
# Still render OCR even if no highlights, using the determined actual scale
|
700
|
+
renderer = HighlightRenderer(page_obj, base_image_pil, [], renderer_scale, True)
|
654
701
|
rendered_image = renderer.render()
|
655
702
|
else:
|
656
|
-
rendered_image =
|
703
|
+
rendered_image = base_image_pil # No highlights, no OCR requested
|
657
704
|
|
658
705
|
# --- Add Legend (Based ONLY on this page's highlights) ---
|
659
706
|
if labels:
|
@@ -697,12 +744,12 @@ class HighlightingService:
|
|
697
744
|
Args:
|
698
745
|
page_index: Index of the page to render.
|
699
746
|
temporary_highlights: List of highlight data dicts (from ElementCollection._prepare).
|
700
|
-
scale:
|
747
|
+
scale: Original scale factor for rendering, used if width/height are not provided.
|
701
748
|
labels: Whether to include a legend.
|
702
749
|
legend_position: Position of the legend.
|
703
750
|
render_ocr: Whether to render OCR text.
|
704
|
-
resolution: Resolution for base page image rendering.
|
705
|
-
**kwargs: Additional args for pdfplumber's to_image.
|
751
|
+
resolution: Resolution for base page image rendering if width/height not used.
|
752
|
+
**kwargs: Additional args for pdfplumber's to_image (e.g., width, height).
|
706
753
|
|
707
754
|
Returns:
|
708
755
|
PIL Image of the preview, or None if rendering fails.
|
@@ -711,35 +758,64 @@ class HighlightingService:
|
|
711
758
|
logger.error(f"Invalid page index {page_index} for render_preview.")
|
712
759
|
return None
|
713
760
|
|
714
|
-
|
715
|
-
|
761
|
+
page_obj = self._pdf.pages[page_index]
|
762
|
+
|
763
|
+
to_image_args = kwargs.copy()
|
764
|
+
actual_scale_x = None
|
765
|
+
actual_scale_y = None
|
766
|
+
|
767
|
+
# Determine arguments for page._page.to_image()
|
768
|
+
if "width" in to_image_args and to_image_args["width"] is not None:
|
769
|
+
logger.debug(f"Rendering preview for page {page_index} with width={to_image_args['width']}.")
|
770
|
+
# Resolution is implicitly handled by pdfplumber when width is set
|
771
|
+
if "height" in to_image_args:
|
772
|
+
to_image_args.pop("height", None)
|
773
|
+
# after image is created, we will calculate actual_scale_x and actual_scale_y
|
774
|
+
|
775
|
+
elif "height" in to_image_args and to_image_args["height"] is not None:
|
776
|
+
logger.debug(f"Rendering preview for page {page_index} with height={to_image_args['height']}.")
|
777
|
+
# Resolution is implicitly handled by pdfplumber when height is set
|
778
|
+
# after image is created, we will calculate actual_scale_x and actual_scale_y
|
779
|
+
else:
|
780
|
+
# Neither width nor height is provided, use resolution or scale.
|
781
|
+
render_resolution = resolution if resolution is not None else scale * 72
|
782
|
+
to_image_args["resolution"] = render_resolution
|
783
|
+
actual_scale_x = render_resolution / 72.0
|
784
|
+
actual_scale_y = render_resolution / 72.0
|
785
|
+
logger.debug(f"Rendering preview for page {page_index} with resolution={render_resolution} (scale: {actual_scale_x:.2f}).")
|
716
786
|
|
717
787
|
try:
|
718
|
-
|
719
|
-
|
720
|
-
base_image = (
|
788
|
+
img_object = page_obj._page.to_image(**to_image_args)
|
789
|
+
base_image_pil = (
|
721
790
|
img_object.annotated
|
722
791
|
if hasattr(img_object, "annotated")
|
723
792
|
else img_object._repr_png_()
|
724
793
|
)
|
725
|
-
if isinstance(
|
794
|
+
if isinstance(base_image_pil, bytes):
|
726
795
|
from io import BytesIO
|
796
|
+
base_image_pil = Image.open(BytesIO(base_image_pil))
|
797
|
+
base_image_pil = base_image_pil.convert("RGB")
|
727
798
|
|
728
|
-
|
729
|
-
|
799
|
+
# If scale was not determined by resolution, calculate it now from base_image_pil dimensions
|
800
|
+
if actual_scale_x is None or actual_scale_y is None:
|
801
|
+
if page_obj.width > 0:
|
802
|
+
actual_scale_x = base_image_pil.width / page_obj.width
|
803
|
+
else:
|
804
|
+
actual_scale_x = scale # Fallback to original scale
|
805
|
+
if page_obj.height > 0:
|
806
|
+
actual_scale_y = base_image_pil.height / page_obj.height
|
807
|
+
else:
|
808
|
+
actual_scale_y = scale # Fallback to original scale
|
809
|
+
logger.debug(f"Calculated actual scales for page {page_index}: x={actual_scale_x:.2f}, y={actual_scale_y:.2f} from image size {base_image_pil.size} and page size ({page_obj.width}, {page_obj.height})")
|
730
810
|
|
731
811
|
# Convert temporary highlight dicts to Highlight objects
|
732
|
-
# Note: Colors/labels should be determined *here* for temporary preview
|
733
812
|
preview_highlights = []
|
734
813
|
for hl_data in temporary_highlights:
|
735
|
-
# Determine the final color using the service logic
|
736
814
|
final_color = self._determine_highlight_color(
|
737
815
|
color_input=hl_data.get("color"),
|
738
816
|
label=hl_data.get("label"),
|
739
817
|
use_color_cycling=hl_data.get("use_color_cycling", False),
|
740
818
|
)
|
741
|
-
|
742
|
-
# Extract potential attributes to draw
|
743
819
|
attrs_to_draw = {}
|
744
820
|
element = hl_data.get("element")
|
745
821
|
include_attrs = hl_data.get("include_attrs")
|
@@ -753,25 +829,29 @@ class HighlightingService:
|
|
753
829
|
logger.warning(
|
754
830
|
f"Attribute '{attr_name}' not found on element {element}"
|
755
831
|
)
|
756
|
-
|
757
|
-
# Add highlight if geometry exists
|
758
832
|
if hl_data.get("bbox") or hl_data.get("polygon"):
|
759
833
|
preview_highlights.append(
|
760
834
|
Highlight(
|
761
835
|
page_index=hl_data["page_index"],
|
762
836
|
bbox=hl_data.get("bbox"),
|
763
837
|
polygon=hl_data.get("polygon"),
|
764
|
-
color=final_color,
|
838
|
+
color=final_color,
|
765
839
|
label=hl_data.get("label"),
|
766
840
|
attributes=attrs_to_draw,
|
767
841
|
)
|
768
842
|
)
|
769
|
-
|
770
|
-
#
|
771
|
-
|
843
|
+
|
844
|
+
# Use the calculated actual_scale_x for the HighlightRenderer
|
845
|
+
# Assuming HighlightRenderer can handle a single scale or we adapt it.
|
846
|
+
# For now, pdfplumber usually maintains aspect ratio, so one scale should be okay.
|
847
|
+
# If not, HighlightRenderer needs to accept scale_x and scale_y.
|
848
|
+
# We will use actual_scale_x assuming aspect ratio is maintained by pdfplumber,
|
849
|
+
# or if not, it's a reasonable approximation for highlight scaling.
|
850
|
+
renderer_scale = actual_scale_x
|
851
|
+
|
852
|
+
renderer = HighlightRenderer(page_obj, base_image_pil, preview_highlights, renderer_scale, render_ocr)
|
772
853
|
rendered_image = renderer.render()
|
773
854
|
|
774
|
-
# Create legend only from temporary highlights
|
775
855
|
legend = None
|
776
856
|
if labels:
|
777
857
|
preview_labels = {h.label: h.color for h in preview_highlights if h.label}
|
@@ -781,7 +861,7 @@ class HighlightingService:
|
|
781
861
|
rendered_image, legend, position=legend_position
|
782
862
|
)
|
783
863
|
else:
|
784
|
-
final_image = rendered_image
|
864
|
+
final_image = rendered_image
|
785
865
|
else:
|
786
866
|
final_image = rendered_image
|
787
867
|
|
natural_pdf/core/page.py
CHANGED
@@ -1349,7 +1349,9 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
1349
1349
|
self._highlighter.clear_page(self.index)
|
1350
1350
|
return self
|
1351
1351
|
|
1352
|
-
def analyze_text_styles(
|
1352
|
+
def analyze_text_styles(
|
1353
|
+
self, options: Optional[TextStyleOptions] = None
|
1354
|
+
) -> "ElementCollection":
|
1353
1355
|
"""
|
1354
1356
|
Analyze text elements by style, adding attributes directly to elements.
|
1355
1357
|
|
@@ -2130,7 +2132,7 @@ class Page(ClassificationMixin, ExtractionMixin):
|
|
2130
2132
|
if not _IPYWIDGETS_AVAILABLE or SimpleInteractiveViewerWidget is None:
|
2131
2133
|
logger.error(
|
2132
2134
|
"Interactive viewer requires optional dependencies ('ipywidgets'). "
|
2133
|
-
"Install with `pip install natural-pdf[
|
2135
|
+
"Install with `pip install natural-pdf[viewer]`"
|
2134
2136
|
)
|
2135
2137
|
# raise ImportError("ipywidgets not found.") # Option 1: Raise error
|
2136
2138
|
return None # Option 2: Return None gracefully
|
natural_pdf/core/pdf.py
CHANGED
@@ -60,6 +60,7 @@ except ImportError:
|
|
60
60
|
"Search dependencies are not installed. Install with: pip install natural-pdf[search]"
|
61
61
|
)
|
62
62
|
|
63
|
+
|
63
64
|
try:
|
64
65
|
from natural_pdf.exporters.searchable_pdf import create_searchable_pdf
|
65
66
|
except ImportError:
|
@@ -791,10 +792,10 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
791
792
|
"PDF.save_searchable() is deprecated. Use PDF.save_pdf(..., ocr=True) instead."
|
792
793
|
)
|
793
794
|
if create_searchable_pdf is None:
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
795
|
+
raise ImportError(
|
796
|
+
"Saving searchable PDF requires 'pikepdf'. "
|
797
|
+
'Install with: pip install "natural-pdf[ocr-export]"'
|
798
|
+
)
|
798
799
|
output_path_str = str(output_path)
|
799
800
|
# Call the exporter directly, passing self (the PDF instance)
|
800
801
|
create_searchable_pdf(self, output_path_str, dpi=dpi, **kwargs)
|
@@ -842,55 +843,59 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
842
843
|
output_path_str = str(output_path_obj)
|
843
844
|
|
844
845
|
if ocr:
|
845
|
-
if create_searchable_pdf is None:
|
846
|
-
raise ImportError(
|
847
|
-
"Saving with ocr=True requires 'pikepdf' and 'Pillow'. "
|
848
|
-
"Install with: pip install \"natural-pdf[ocr-export]\""
|
849
|
-
)
|
850
|
-
|
851
|
-
# Optional: Add warning about vector data loss similar to PageCollection
|
852
846
|
has_vector_elements = False
|
853
847
|
for page in self.pages:
|
854
|
-
if (
|
855
|
-
hasattr(page,
|
856
|
-
|
857
|
-
|
858
|
-
|
848
|
+
if (
|
849
|
+
hasattr(page, "rects")
|
850
|
+
and page.rects
|
851
|
+
or hasattr(page, "lines")
|
852
|
+
and page.lines
|
853
|
+
or hasattr(page, "curves")
|
854
|
+
and page.curves
|
855
|
+
or (
|
856
|
+
hasattr(page, "chars")
|
857
|
+
and any(getattr(el, "source", None) != "ocr" for el in page.chars)
|
858
|
+
)
|
859
|
+
or (
|
860
|
+
hasattr(page, "words")
|
861
|
+
and any(getattr(el, "source", None) != "ocr" for el in page.words)
|
862
|
+
)
|
863
|
+
):
|
859
864
|
has_vector_elements = True
|
860
865
|
break
|
861
866
|
if has_vector_elements:
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
+
logger.warning(
|
868
|
+
"Warning: Saving with ocr=True creates an image-based PDF. "
|
869
|
+
"Original vector elements (rects, lines, non-OCR text/chars) "
|
870
|
+
"will not be preserved in the output file."
|
871
|
+
)
|
867
872
|
|
868
873
|
logger.info(f"Saving searchable PDF (OCR text layer) to: {output_path_str}")
|
869
874
|
try:
|
870
875
|
# Delegate to the searchable PDF exporter, passing self (PDF instance)
|
871
876
|
create_searchable_pdf(self, output_path_str, dpi=dpi)
|
872
877
|
except Exception as e:
|
873
|
-
|
878
|
+
raise RuntimeError(f"Failed to create searchable PDF: {e}") from e
|
874
879
|
|
875
880
|
elif original:
|
876
881
|
if create_original_pdf is None:
|
877
882
|
raise ImportError(
|
878
883
|
"Saving with original=True requires 'pikepdf'. "
|
879
|
-
|
884
|
+
'Install with: pip install "natural-pdf[ocr-export]"'
|
880
885
|
)
|
881
886
|
|
882
|
-
|
887
|
+
# Optional: Add warning about losing OCR data similar to PageCollection
|
883
888
|
has_ocr_elements = False
|
884
889
|
for page in self.pages:
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
890
|
+
if hasattr(page, "find_all"):
|
891
|
+
ocr_text_elements = page.find_all("text[source=ocr]")
|
892
|
+
if ocr_text_elements:
|
893
|
+
has_ocr_elements = True
|
894
|
+
break
|
895
|
+
elif hasattr(page, "words"): # Fallback
|
896
|
+
if any(getattr(el, "source", None) == "ocr" for el in page.words):
|
897
|
+
has_ocr_elements = True
|
898
|
+
break
|
894
899
|
if has_ocr_elements:
|
895
900
|
logger.warning(
|
896
901
|
"Warning: Saving with original=True preserves original page content. "
|
@@ -899,11 +904,11 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
899
904
|
|
900
905
|
logger.info(f"Saving original PDF content to: {output_path_str}")
|
901
906
|
try:
|
902
|
-
|
903
|
-
|
907
|
+
# Delegate to the original PDF exporter, passing self (PDF instance)
|
908
|
+
create_original_pdf(self, output_path_str)
|
904
909
|
except Exception as e:
|
905
|
-
|
906
|
-
|
910
|
+
# Re-raise exception from exporter
|
911
|
+
raise e
|
907
912
|
|
908
913
|
def ask(
|
909
914
|
self,
|
@@ -1227,6 +1232,16 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
1227
1232
|
"""Context manager exit."""
|
1228
1233
|
self.close()
|
1229
1234
|
|
1235
|
+
def __repr__(self) -> str:
|
1236
|
+
"""Return a string representation of the PDF object."""
|
1237
|
+
if not hasattr(self, "_pages"):
|
1238
|
+
page_count_str = "uninitialized"
|
1239
|
+
else:
|
1240
|
+
page_count_str = str(len(self._pages))
|
1241
|
+
|
1242
|
+
source_info = getattr(self, "source_path", "unknown source")
|
1243
|
+
return f"<PDF source='{source_info}' pages={page_count_str}>"
|
1244
|
+
|
1230
1245
|
def get_id(self) -> str:
|
1231
1246
|
"""Get unique identifier for this PDF."""
|
1232
1247
|
"""Get unique identifier for this PDF."""
|
@@ -1400,7 +1415,7 @@ class PDF(ExtractionMixin, ExportMixin, ClassificationMixin):
|
|
1400
1415
|
except ImportError:
|
1401
1416
|
raise ImportError(
|
1402
1417
|
"Classification dependencies missing. "
|
1403
|
-
'Install with: pip install "natural-pdf[
|
1418
|
+
'Install with: pip install "natural-pdf[core-ml]"'
|
1404
1419
|
)
|
1405
1420
|
raise ClassificationError("ClassificationManager not available.")
|
1406
1421
|
|
natural_pdf/elements/base.py
CHANGED
@@ -814,6 +814,7 @@ class Element(DirectionalMixin):
|
|
814
814
|
legend_position: str = "right",
|
815
815
|
color: Optional[Union[Tuple, str]] = "red", # Default color for single element
|
816
816
|
label: Optional[str] = None,
|
817
|
+
width: Optional[int] = None, # Add width parameter
|
817
818
|
) -> Optional["Image.Image"]:
|
818
819
|
"""
|
819
820
|
Show the page with only this element highlighted temporarily.
|
@@ -824,6 +825,7 @@ class Element(DirectionalMixin):
|
|
824
825
|
legend_position: Position of the legend
|
825
826
|
color: Color to highlight this element (default: red)
|
826
827
|
label: Optional label for this element in the legend
|
828
|
+
width: Optional width for the output image in pixels
|
827
829
|
|
828
830
|
Returns:
|
829
831
|
PIL Image of the page with only this element highlighted, or None if error.
|
@@ -861,6 +863,7 @@ class Element(DirectionalMixin):
|
|
861
863
|
page_index=self.page.index,
|
862
864
|
temporary_highlights=[temp_highlight_data],
|
863
865
|
scale=scale,
|
866
|
+
width=width, # Pass the width parameter
|
864
867
|
labels=labels,
|
865
868
|
legend_position=legend_position,
|
866
869
|
)
|
@@ -898,6 +901,7 @@ class Element(DirectionalMixin):
|
|
898
901
|
self,
|
899
902
|
*,
|
900
903
|
text: str,
|
904
|
+
contains: str = "all",
|
901
905
|
apply_exclusions: bool = True,
|
902
906
|
regex: bool = False,
|
903
907
|
case: bool = True,
|
@@ -909,6 +913,7 @@ class Element(DirectionalMixin):
|
|
909
913
|
self,
|
910
914
|
selector: str,
|
911
915
|
*,
|
916
|
+
contains: str = "all",
|
912
917
|
apply_exclusions: bool = True,
|
913
918
|
regex: bool = False,
|
914
919
|
case: bool = True,
|
@@ -920,6 +925,7 @@ class Element(DirectionalMixin):
|
|
920
925
|
selector: Optional[str] = None,
|
921
926
|
*,
|
922
927
|
text: Optional[str] = None,
|
928
|
+
contains: str = "all",
|
923
929
|
apply_exclusions: bool = True,
|
924
930
|
regex: bool = False,
|
925
931
|
case: bool = True,
|
@@ -934,6 +940,9 @@ class Element(DirectionalMixin):
|
|
934
940
|
Args:
|
935
941
|
selector: CSS-like selector string.
|
936
942
|
text: Text content to search for (equivalent to 'text:contains(...)').
|
943
|
+
contains: How to determine if elements are inside: 'all' (fully inside),
|
944
|
+
'any' (any overlap), or 'center' (center point inside).
|
945
|
+
(default: "all")
|
937
946
|
apply_exclusions: Whether to apply exclusion regions (default: True).
|
938
947
|
regex: Whether to use regex for text search (`selector` or `text`) (default: False).
|
939
948
|
case: Whether to do case-sensitive text search (`selector` or `text`) (default: True).
|
@@ -950,6 +959,7 @@ class Element(DirectionalMixin):
|
|
950
959
|
return temp_region.find(
|
951
960
|
selector=selector,
|
952
961
|
text=text,
|
962
|
+
contains=contains,
|
953
963
|
apply_exclusions=apply_exclusions,
|
954
964
|
regex=regex,
|
955
965
|
case=case,
|
@@ -961,6 +971,7 @@ class Element(DirectionalMixin):
|
|
961
971
|
self,
|
962
972
|
*,
|
963
973
|
text: str,
|
974
|
+
contains: str = "all",
|
964
975
|
apply_exclusions: bool = True,
|
965
976
|
regex: bool = False,
|
966
977
|
case: bool = True,
|
@@ -972,6 +983,7 @@ class Element(DirectionalMixin):
|
|
972
983
|
self,
|
973
984
|
selector: str,
|
974
985
|
*,
|
986
|
+
contains: str = "all",
|
975
987
|
apply_exclusions: bool = True,
|
976
988
|
regex: bool = False,
|
977
989
|
case: bool = True,
|
@@ -983,6 +995,7 @@ class Element(DirectionalMixin):
|
|
983
995
|
selector: Optional[str] = None,
|
984
996
|
*,
|
985
997
|
text: Optional[str] = None,
|
998
|
+
contains: str = "all",
|
986
999
|
apply_exclusions: bool = True,
|
987
1000
|
regex: bool = False,
|
988
1001
|
case: bool = True,
|
@@ -997,6 +1010,9 @@ class Element(DirectionalMixin):
|
|
997
1010
|
Args:
|
998
1011
|
selector: CSS-like selector string.
|
999
1012
|
text: Text content to search for (equivalent to 'text:contains(...)').
|
1013
|
+
contains: How to determine if elements are inside: 'all' (fully inside),
|
1014
|
+
'any' (any overlap), or 'center' (center point inside).
|
1015
|
+
(default: "all")
|
1000
1016
|
apply_exclusions: Whether to apply exclusion regions (default: True).
|
1001
1017
|
regex: Whether to use regex for text search (`selector` or `text`) (default: False).
|
1002
1018
|
case: Whether to do case-sensitive text search (`selector` or `text`) (default: True).
|
@@ -1013,6 +1029,7 @@ class Element(DirectionalMixin):
|
|
1013
1029
|
return temp_region.find_all(
|
1014
1030
|
selector=selector,
|
1015
1031
|
text=text,
|
1032
|
+
contains=contains,
|
1016
1033
|
apply_exclusions=apply_exclusions,
|
1017
1034
|
regex=regex,
|
1018
1035
|
case=case,
|