natural-pdf 0.1.40__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. natural_pdf/__init__.py +6 -7
  2. natural_pdf/analyzers/__init__.py +6 -1
  3. natural_pdf/analyzers/guides.py +354 -258
  4. natural_pdf/analyzers/layout/layout_analyzer.py +2 -3
  5. natural_pdf/analyzers/layout/layout_manager.py +18 -4
  6. natural_pdf/analyzers/layout/paddle.py +11 -0
  7. natural_pdf/analyzers/layout/surya.py +2 -3
  8. natural_pdf/analyzers/shape_detection_mixin.py +25 -34
  9. natural_pdf/analyzers/text_structure.py +2 -2
  10. natural_pdf/classification/manager.py +1 -1
  11. natural_pdf/collections/mixins.py +3 -2
  12. natural_pdf/core/highlighting_service.py +743 -32
  13. natural_pdf/core/page.py +236 -383
  14. natural_pdf/core/page_collection.py +1249 -0
  15. natural_pdf/core/pdf.py +172 -83
  16. natural_pdf/{collections → core}/pdf_collection.py +18 -11
  17. natural_pdf/core/render_spec.py +335 -0
  18. natural_pdf/describe/base.py +1 -1
  19. natural_pdf/elements/__init__.py +1 -0
  20. natural_pdf/elements/base.py +108 -83
  21. natural_pdf/elements/{collections.py → element_collection.py} +566 -1487
  22. natural_pdf/elements/line.py +0 -1
  23. natural_pdf/elements/rect.py +0 -1
  24. natural_pdf/elements/region.py +318 -243
  25. natural_pdf/elements/text.py +9 -7
  26. natural_pdf/exporters/base.py +2 -2
  27. natural_pdf/exporters/original_pdf.py +1 -1
  28. natural_pdf/exporters/paddleocr.py +2 -4
  29. natural_pdf/exporters/searchable_pdf.py +3 -2
  30. natural_pdf/extraction/mixin.py +1 -3
  31. natural_pdf/flows/collections.py +1 -69
  32. natural_pdf/flows/element.py +4 -4
  33. natural_pdf/flows/flow.py +1200 -243
  34. natural_pdf/flows/region.py +707 -261
  35. natural_pdf/ocr/ocr_options.py +0 -2
  36. natural_pdf/ocr/utils.py +2 -1
  37. natural_pdf/qa/document_qa.py +21 -5
  38. natural_pdf/search/search_service_protocol.py +1 -1
  39. natural_pdf/selectors/parser.py +2 -2
  40. natural_pdf/tables/result.py +35 -1
  41. natural_pdf/text_mixin.py +7 -3
  42. natural_pdf/utils/debug.py +2 -1
  43. natural_pdf/utils/highlighting.py +1 -0
  44. natural_pdf/utils/layout.py +2 -2
  45. natural_pdf/utils/packaging.py +4 -3
  46. natural_pdf/utils/text_extraction.py +15 -12
  47. natural_pdf/utils/visualization.py +385 -0
  48. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/METADATA +7 -3
  49. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/RECORD +55 -53
  50. optimization/memory_comparison.py +1 -1
  51. optimization/pdf_analyzer.py +2 -2
  52. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/WHEEL +0 -0
  53. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/entry_points.txt +0 -0
  54. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/licenses/LICENSE +0 -0
  55. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,335 @@
1
+ """Unified rendering infrastructure for natural-pdf.
2
+
3
+ This module provides the core components for the unified image generation system:
4
+ - RenderSpec: Data structure describing what to render
5
+ - Visualizable: Mixin providing show/render/export methods
6
+ """
7
+
8
+ import logging
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
12
+
13
+ if TYPE_CHECKING:
14
+ from PIL import Image as PIL_Image
15
+
16
+ from natural_pdf.core.page import Page
17
+ from natural_pdf.elements.base import Element
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class RenderSpec:
24
+ """Specification for rendering a single page or region.
25
+
26
+ This is the core data structure that unifies all rendering operations.
27
+ Every visual object in natural-pdf converts its display requirements
28
+ into one or more RenderSpecs, which are then processed by the
29
+ unified rendering pipeline.
30
+
31
+ Attributes:
32
+ page: The page to render
33
+ crop_bbox: Optional bounding box (x0, y0, x1, y1) to crop to
34
+ highlights: List of highlight specifications, each containing:
35
+ - bbox or polygon: The geometry to highlight
36
+ - color: Optional color for the highlight
37
+ - label: Optional label text
38
+ - element: Optional reference to the source element
39
+ """
40
+
41
+ page: "Page"
42
+ crop_bbox: Optional[Tuple[float, float, float, float]] = None
43
+ highlights: List[Dict[str, Any]] = field(default_factory=list)
44
+
45
+ def add_highlight(
46
+ self,
47
+ bbox: Optional[Tuple[float, float, float, float]] = None,
48
+ polygon: Optional[List[Tuple[float, float]]] = None,
49
+ color: Optional[Union[str, Tuple[int, int, int]]] = None,
50
+ label: Optional[str] = None,
51
+ element: Optional["Element"] = None,
52
+ ) -> None:
53
+ """Add a highlight to this render spec.
54
+
55
+ Args:
56
+ bbox: Bounding box to highlight
57
+ polygon: Polygon points to highlight (alternative to bbox)
58
+ color: Color for the highlight
59
+ label: Label text for the highlight
60
+ element: Source element reference
61
+ """
62
+ if bbox is None and polygon is None and element is not None:
63
+ # Extract geometry from element
64
+ if (
65
+ hasattr(element, "polygon")
66
+ and hasattr(element, "has_polygon")
67
+ and element.has_polygon
68
+ ):
69
+ polygon = element.polygon
70
+ elif hasattr(element, "bbox"):
71
+ bbox = element.bbox
72
+
73
+ if bbox is None and polygon is None:
74
+ raise ValueError("Must provide bbox, polygon, or element with geometry")
75
+
76
+ highlight = {
77
+ "bbox": bbox,
78
+ "polygon": polygon,
79
+ "color": color,
80
+ "label": label,
81
+ "element": element,
82
+ }
83
+ # Remove None values
84
+ highlight = {k: v for k, v in highlight.items() if v is not None}
85
+ self.highlights.append(highlight)
86
+
87
+
88
+ class Visualizable:
89
+ """Mixin class providing unified show/render/export methods.
90
+
91
+ Classes that inherit from Visualizable need only implement
92
+ _get_render_specs() to gain full image generation capabilities.
93
+ """
94
+
95
+ def _get_render_specs(
96
+ self, mode: Literal["show", "render"] = "show", **kwargs
97
+ ) -> List[RenderSpec]:
98
+ """Get render specifications for this object.
99
+
100
+ This is the only method subclasses need to implement.
101
+ It should return a list of RenderSpec objects describing
102
+ what needs to be rendered.
103
+
104
+ Args:
105
+ mode: Rendering mode - 'show' includes highlights, 'render' is clean
106
+ **kwargs: Additional parameters from show/render methods
107
+
108
+ Returns:
109
+ List of RenderSpec objects
110
+ """
111
+ raise NotImplementedError(f"{self.__class__.__name__} must implement _get_render_specs()")
112
+
113
+ def _get_highlighter(self):
114
+ """Get the highlighting service for rendering.
115
+
116
+ This method should be overridden by classes that have
117
+ a different way of accessing the highlighter.
118
+ """
119
+ # Try common patterns
120
+ if hasattr(self, "_highlighter"):
121
+ return self._highlighter
122
+ elif hasattr(self, "page") and hasattr(self.page, "_highlighter"):
123
+ return self.page._highlighter
124
+ elif hasattr(self, "pages") and self.pages:
125
+ # For collections, use first page's highlighter
126
+ first_page = next(iter(self.pages))
127
+ if hasattr(first_page, "_highlighter"):
128
+ return first_page._highlighter
129
+
130
+ raise RuntimeError(
131
+ f"Cannot find HighlightingService for {self.__class__.__name__}. "
132
+ "Override _get_highlighter() to provide access."
133
+ )
134
+
135
+ def show(
136
+ self,
137
+ *,
138
+ # Basic rendering options
139
+ resolution: Optional[float] = None,
140
+ width: Optional[int] = None,
141
+ # Highlight options
142
+ color: Optional[Union[str, Tuple[int, int, int]]] = None,
143
+ labels: bool = True,
144
+ label_format: Optional[str] = None,
145
+ highlights: Optional[List[Dict[str, Any]]] = None,
146
+ legend_position: str = "right",
147
+ annotate: Optional[Union[str, List[str]]] = None,
148
+ # Layout options for multi-page/region
149
+ layout: Literal["stack", "grid", "single"] = "stack",
150
+ stack_direction: Literal["vertical", "horizontal"] = "vertical",
151
+ gap: int = 5,
152
+ columns: Optional[int] = None, # For grid layout
153
+ # Cropping options
154
+ crop: Union[bool, Literal["content"]] = False,
155
+ crop_bbox: Optional[Tuple[float, float, float, float]] = None,
156
+ **kwargs,
157
+ ) -> Optional["PIL_Image"]:
158
+ """Generate a preview image with highlights.
159
+
160
+ This method is for interactive debugging and visualization.
161
+ Elements are highlighted to show what's selected or being worked with.
162
+
163
+ Args:
164
+ resolution: DPI for rendering (default from global settings)
165
+ width: Target width in pixels (overrides resolution)
166
+ color: Default highlight color
167
+ labels: Whether to show labels for highlights
168
+ label_format: Format string for labels (e.g., "Element {index}")
169
+ highlights: Additional highlight groups to show
170
+ legend_position: Position of legend/colorbar ('right', 'left', 'top', 'bottom')
171
+ annotate: Attribute name(s) to display on highlights (string or list)
172
+ layout: How to arrange multiple pages/regions
173
+ stack_direction: Direction for stack layout
174
+ gap: Pixels between stacked images
175
+ columns: Number of columns for grid layout
176
+ crop: Whether to crop (True, False, or 'content' for bbox of elements)
177
+ crop_bbox: Explicit crop bounds
178
+ **kwargs: Additional parameters passed to rendering
179
+
180
+ Returns:
181
+ PIL Image object or None if nothing to render
182
+ """
183
+ # Convert string to list if needed
184
+ if isinstance(annotate, str):
185
+ annotate = [annotate]
186
+
187
+ specs = self._get_render_specs(
188
+ mode="show",
189
+ color=color,
190
+ highlights=highlights,
191
+ crop=crop,
192
+ crop_bbox=crop_bbox,
193
+ annotate=annotate,
194
+ **kwargs,
195
+ )
196
+
197
+ if not specs:
198
+ logger.warning(f"{self.__class__.__name__}.show() generated no render specs")
199
+ return None
200
+
201
+ highlighter = self._get_highlighter()
202
+ return highlighter.unified_render(
203
+ specs=specs,
204
+ resolution=resolution,
205
+ width=width,
206
+ labels=labels,
207
+ label_format=label_format,
208
+ legend_position=legend_position,
209
+ layout=layout,
210
+ stack_direction=stack_direction,
211
+ gap=gap,
212
+ columns=columns,
213
+ **kwargs,
214
+ )
215
+
216
+ def render(
217
+ self,
218
+ *,
219
+ # Basic rendering options
220
+ resolution: Optional[float] = None,
221
+ width: Optional[int] = None,
222
+ # Layout options for multi-page/region
223
+ layout: Literal["stack", "grid", "single"] = "stack",
224
+ stack_direction: Literal["vertical", "horizontal"] = "vertical",
225
+ gap: int = 5,
226
+ columns: Optional[int] = None,
227
+ # Cropping options
228
+ crop: Union[bool, Literal["content"]] = False,
229
+ crop_bbox: Optional[Tuple[float, float, float, float]] = None,
230
+ **kwargs,
231
+ ) -> Optional["PIL_Image"]:
232
+ """Generate a clean image without highlights.
233
+
234
+ This method produces publication-ready images without
235
+ any debugging annotations or highlights.
236
+
237
+ Args:
238
+ resolution: DPI for rendering (default from global settings)
239
+ width: Target width in pixels (overrides resolution)
240
+ layout: How to arrange multiple pages/regions
241
+ stack_direction: Direction for stack layout
242
+ gap: Pixels between stacked images
243
+ columns: Number of columns for grid layout
244
+ crop: Whether to crop
245
+ crop_bbox: Explicit crop bounds
246
+ **kwargs: Additional parameters passed to rendering
247
+
248
+ Returns:
249
+ PIL Image object or None if nothing to render
250
+ """
251
+ specs = self._get_render_specs(mode="render", crop=crop, crop_bbox=crop_bbox, **kwargs)
252
+
253
+ if not specs:
254
+ logger.warning(f"{self.__class__.__name__}.render() generated no render specs")
255
+ return None
256
+
257
+ highlighter = self._get_highlighter()
258
+ return highlighter.unified_render(
259
+ specs=specs,
260
+ resolution=resolution,
261
+ width=width,
262
+ labels=False, # Never show labels in render mode
263
+ layout=layout,
264
+ stack_direction=stack_direction,
265
+ gap=gap,
266
+ columns=columns,
267
+ **kwargs,
268
+ )
269
+
270
+ def export(
271
+ self,
272
+ path: Union[str, Path],
273
+ *,
274
+ # All the same options as render()
275
+ resolution: Optional[float] = None,
276
+ width: Optional[int] = None,
277
+ layout: Literal["stack", "grid", "single"] = "stack",
278
+ stack_direction: Literal["vertical", "horizontal"] = "vertical",
279
+ gap: int = 5,
280
+ columns: Optional[int] = None,
281
+ crop: Union[bool, Literal["content"]] = False,
282
+ crop_bbox: Optional[Tuple[float, float, float, float]] = None,
283
+ format: Optional[str] = None,
284
+ **kwargs,
285
+ ) -> None:
286
+ """Export a clean image to file.
287
+
288
+ This is a convenience method that renders and saves in one step.
289
+
290
+ Args:
291
+ path: Output file path
292
+ resolution: DPI for rendering
293
+ width: Target width in pixels
294
+ layout: How to arrange multiple pages/regions
295
+ stack_direction: Direction for stack layout
296
+ gap: Pixels between stacked images
297
+ columns: Number of columns for grid layout
298
+ crop: Whether to crop
299
+ crop_bbox: Explicit crop bounds
300
+ format: Image format (inferred from path if not specified)
301
+ **kwargs: Additional parameters passed to rendering
302
+ """
303
+ image = self.render(
304
+ resolution=resolution,
305
+ width=width,
306
+ layout=layout,
307
+ stack_direction=stack_direction,
308
+ gap=gap,
309
+ columns=columns,
310
+ crop=crop,
311
+ crop_bbox=crop_bbox,
312
+ **kwargs,
313
+ )
314
+
315
+ if image is None:
316
+ raise ValueError(f"No image generated by {self.__class__.__name__}.render()")
317
+
318
+ # Ensure path is a Path object
319
+ path = Path(path)
320
+
321
+ # Determine format
322
+ if format is None:
323
+ format = path.suffix.lstrip(".").upper()
324
+ if format == "JPG":
325
+ format = "JPEG"
326
+
327
+ # Save image
328
+ save_kwargs = {}
329
+ if format == "JPEG":
330
+ save_kwargs["quality"] = kwargs.get("quality", 95)
331
+ elif format == "PNG":
332
+ save_kwargs["compress_level"] = kwargs.get("compress_level", 6)
333
+
334
+ image.save(path, format=format, **save_kwargs)
335
+ logger.info(f"Exported {self.__class__.__name__} to {path}")
@@ -17,7 +17,7 @@ from .summary import ElementSummary, InspectionSummary
17
17
  if TYPE_CHECKING:
18
18
  from natural_pdf.core.page import Page
19
19
  from natural_pdf.elements.base import Element
20
- from natural_pdf.elements.collections import ElementCollection
20
+ from natural_pdf.elements.element_collection import ElementCollection
21
21
  from natural_pdf.elements.region import Region
22
22
 
23
23
  logger = logging.getLogger(__name__)
@@ -1,3 +1,4 @@
1
1
  """
2
2
  Element classes for Natural PDF.
3
+
3
4
  """
@@ -2,11 +2,12 @@
2
2
  Base Element class for natural-pdf.
3
3
  """
4
4
 
5
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, overload
5
+ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union, overload
6
6
 
7
7
  from PIL import Image
8
8
 
9
9
  from natural_pdf.classification.mixin import ClassificationMixin
10
+ from natural_pdf.core.render_spec import RenderSpec, Visualizable
10
11
  from natural_pdf.describe.mixin import DescribeMixin
11
12
 
12
13
  # Import selector parsing functions
@@ -15,7 +16,7 @@ from natural_pdf.selectors.parser import parse_selector, selector_to_filter_func
15
16
  if TYPE_CHECKING:
16
17
  from natural_pdf.classification.manager import ClassificationManager # noqa: F401
17
18
  from natural_pdf.core.page import Page
18
- from natural_pdf.elements.collections import ElementCollection
19
+ from natural_pdf.elements.element_collection import ElementCollection
19
20
  from natural_pdf.elements.region import Region
20
21
 
21
22
 
@@ -563,7 +564,56 @@ class DirectionalMixin:
563
564
  return matches[0]
564
565
 
565
566
 
566
- class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
567
+ class HighlightableMixin:
568
+ """
569
+ Mixin that provides the highlighting protocol for elements.
570
+
571
+ This protocol enables ElementCollection.show() to work with mixed content
572
+ including FlowRegions and elements from multiple pages by providing a
573
+ standard way to get highlight specifications.
574
+ """
575
+
576
+ def get_highlight_specs(self) -> List[Dict[str, Any]]:
577
+ """
578
+ Get highlight specifications for this element.
579
+
580
+ Returns a list of dictionaries, each containing:
581
+ - page: The Page object to highlight on
582
+ - page_index: The 0-based index of the page
583
+ - bbox: The bounding box (x0, y0, x1, y1) to highlight
584
+ - polygon: Optional polygon coordinates for non-rectangular highlights
585
+ - element: Reference to the element being highlighted
586
+
587
+ For regular elements, this returns a single spec.
588
+ For FlowRegions, this returns specs for all constituent regions.
589
+
590
+ Returns:
591
+ List of highlight specification dictionaries
592
+ """
593
+ # Default implementation for regular elements
594
+ if not hasattr(self, "page") or self.page is None:
595
+ return []
596
+
597
+ if not hasattr(self, "bbox") or self.bbox is None:
598
+ return []
599
+
600
+ spec = {
601
+ "page": self.page,
602
+ "page_index": self.page.index if hasattr(self.page, "index") else 0,
603
+ "bbox": self.bbox,
604
+ "element": self,
605
+ }
606
+
607
+ # Add polygon if available
608
+ if hasattr(self, "polygon") and hasattr(self, "has_polygon") and self.has_polygon:
609
+ spec["polygon"] = self.polygon
610
+
611
+ return [spec]
612
+
613
+
614
+ class Element(
615
+ DirectionalMixin, ClassificationMixin, DescribeMixin, HighlightableMixin, Visualizable
616
+ ):
567
617
  """Base class for all PDF elements.
568
618
 
569
619
  This class provides common properties and methods for all PDF elements,
@@ -1024,7 +1074,7 @@ class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
1024
1074
  label: str = "",
1025
1075
  color: Optional[Tuple[float, float, float]] = None,
1026
1076
  use_color_cycling: bool = True,
1027
- include_attrs: Optional[List[str]] = None,
1077
+ annotate: Optional[List[str]] = None,
1028
1078
  existing: str = "append",
1029
1079
  ) -> "Element":
1030
1080
  """Highlight the element with the specified colour.
@@ -1042,7 +1092,7 @@ class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
1042
1092
  "label": label,
1043
1093
  "use_color_cycling": use_color_cycling,
1044
1094
  "element": self, # Pass the element itself so attributes can be accessed
1045
- "include_attrs": include_attrs,
1095
+ "annotate": annotate,
1046
1096
  "existing": existing,
1047
1097
  }
1048
1098
 
@@ -1056,84 +1106,67 @@ class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
1056
1106
 
1057
1107
  return self
1058
1108
 
1059
- def show(
1109
+ def _get_render_specs(
1060
1110
  self,
1061
- resolution: Optional[float] = None,
1062
- labels: bool = True,
1063
- legend_position: str = "right",
1064
- color: Optional[Union[Tuple, str]] = "red", # Default color for single element
1111
+ mode: Literal["show", "render"] = "show",
1112
+ color: Optional[Union[str, Tuple[int, int, int]]] = None,
1113
+ highlights: Optional[List[Dict[str, Any]]] = None,
1114
+ crop: Union[bool, Literal["content"]] = False,
1115
+ crop_bbox: Optional[Tuple[float, float, float, float]] = None,
1065
1116
  label: Optional[str] = None,
1066
- width: Optional[int] = None, # Add width parameter
1067
- crop: bool = False, # NEW: Crop to element bounds before legend
1068
- ) -> Optional["Image.Image"]:
1069
- """
1070
- Show the page with only this element highlighted temporarily.
1117
+ **kwargs,
1118
+ ) -> List[RenderSpec]:
1119
+ """Get render specifications for this element.
1071
1120
 
1072
1121
  Args:
1073
- resolution: Resolution in DPI for rendering (default: uses global options, fallback to 144 DPI)
1074
- labels: Whether to include a legend for the highlight
1075
- legend_position: Position of the legend
1076
- color: Color to highlight this element (default: red)
1077
- label: Optional label for this element in the legend
1078
- width: Optional width for the output image in pixels
1079
- crop: If True, crop the rendered image to this element's
1080
- bounding box before legends/overlays are added.
1122
+ mode: Rendering mode - 'show' includes highlights, 'render' is clean
1123
+ color: Color for highlighting this element in show mode
1124
+ highlights: Additional highlight groups to show
1125
+ crop: Whether to crop to element bounds
1126
+ crop_bbox: Explicit crop bounds
1127
+ label: Optional label for this element
1128
+ **kwargs: Additional parameters
1081
1129
 
1082
1130
  Returns:
1083
- PIL Image of the page with only this element highlighted, or None if error.
1131
+ List with single RenderSpec for this element's page
1084
1132
  """
1085
- # Apply global options as defaults
1086
- import natural_pdf
1087
-
1088
- if resolution is None:
1089
- if natural_pdf.options.image.resolution is not None:
1090
- resolution = natural_pdf.options.image.resolution
1091
- else:
1092
- resolution = 144 # Default resolution when none specified
1093
- if not hasattr(self, "page") or not self.page:
1094
- logger.warning(f"Cannot show element, missing 'page' attribute: {self}")
1095
- return None
1096
- if not hasattr(self.page, "_highlighter") or not self.page._highlighter:
1097
- logger.warning(f"Cannot show element, page lacks highlighter service: {self}")
1098
- return None
1099
-
1100
- service = self.page._highlighter
1101
-
1102
- # Determine the label if not provided
1103
- display_label = label if label is not None else f"{self.__class__.__name__}"
1104
-
1105
- # Prepare temporary highlight data for just this element
1106
- temp_highlight_data = {
1107
- "page_index": self.page.index,
1108
- "bbox": self.bbox if not self.has_polygon else None,
1109
- "polygon": self.polygon if self.has_polygon else None,
1110
- "color": color, # Use provided or default color
1111
- "label": display_label,
1112
- "use_color_cycling": False, # Explicitly false for single preview
1113
- }
1133
+ if not hasattr(self, "page") or self.page is None:
1134
+ return []
1135
+
1136
+ spec = RenderSpec(page=self.page)
1137
+
1138
+ # Handle cropping
1139
+ if crop_bbox:
1140
+ spec.crop_bbox = crop_bbox
1141
+ elif crop == "content" or crop is True:
1142
+ # Crop to element bounds
1143
+ if hasattr(self, "bbox") and self.bbox:
1144
+ spec.crop_bbox = self.bbox
1145
+
1146
+ # Add highlight in show mode
1147
+ if mode == "show":
1148
+ # Use provided label or generate one
1149
+ element_label = label if label is not None else self.__class__.__name__
1150
+
1151
+ spec.add_highlight(
1152
+ element=self,
1153
+ color=color or "red", # Default red for single element
1154
+ label=element_label,
1155
+ )
1114
1156
 
1115
- # Determine crop bbox
1116
- crop_bbox = self.bbox if crop else None
1157
+ # Add additional highlight groups if provided
1158
+ if highlights:
1159
+ for group in highlights:
1160
+ group_elements = group.get("elements", [])
1161
+ group_color = group.get("color", color)
1162
+ group_label = group.get("label")
1117
1163
 
1118
- # Check if we actually got geometry data
1119
- if temp_highlight_data["bbox"] is None and temp_highlight_data["polygon"] is None:
1120
- logger.warning(f"Cannot show element, failed to get bbox or polygon: {self}")
1121
- return None
1164
+ for elem in group_elements:
1165
+ # Only add if element is on same page
1166
+ if hasattr(elem, "page") and elem.page == self.page:
1167
+ spec.add_highlight(element=elem, color=group_color, label=group_label)
1122
1168
 
1123
- # Use render_preview to show only this highlight
1124
- try:
1125
- return service.render_preview(
1126
- page_index=self.page.index,
1127
- temporary_highlights=[temp_highlight_data],
1128
- resolution=resolution,
1129
- width=width, # Pass the width parameter
1130
- labels=labels,
1131
- legend_position=legend_position,
1132
- crop_bbox=crop_bbox,
1133
- )
1134
- except Exception as e:
1135
- logger.error(f"Error calling render_preview for element {self}: {e}", exc_info=True)
1136
- return None
1169
+ return [spec]
1137
1170
 
1138
1171
  def save(
1139
1172
  self,
@@ -1346,22 +1379,14 @@ class Element(DirectionalMixin, ClassificationMixin, DescribeMixin):
1346
1379
  resolution = kwargs.get("resolution", 150)
1347
1380
  from natural_pdf.elements.region import Region # Local import to avoid cycles
1348
1381
 
1349
- return self.expand().to_image(
1382
+ # Use render() for clean image without highlights
1383
+ return self.expand().render(
1350
1384
  resolution=resolution,
1351
- include_highlights=False,
1352
1385
  crop=True,
1353
1386
  )
1354
1387
  else:
1355
1388
  raise ValueError(f"Unsupported model_type for classification: {model_type}")
1356
1389
 
1357
- # ------------------------------------------------------------------
1358
- # Lightweight to_image proxy (vision models, previews, etc.)
1359
- # ------------------------------------------------------------------
1360
-
1361
- def to_image(self, *args, **kwargs): # type: ignore[override]
1362
- """Generate an image of this element by delegating to a temporary Region."""
1363
- return self.expand().to_image(*args, **kwargs)
1364
-
1365
1390
  # ------------------------------------------------------------------
1366
1391
  # Unified analysis storage (maps to metadata["analysis"])
1367
1392
  # ------------------------------------------------------------------