natural-pdf 0.2.5__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -222,7 +222,9 @@ class Region(
222
222
  mode: Literal["show", "render"] = "show",
223
223
  color: Optional[Union[str, Tuple[int, int, int]]] = None,
224
224
  highlights: Optional[Union[List[Dict[str, Any]], bool]] = None,
225
- crop: Union[bool, Literal["content"]] = True, # Default to True for regions
225
+ crop: Union[
226
+ bool, int, str, "Region", Literal["wide"]
227
+ ] = True, # Default to True for regions
226
228
  crop_bbox: Optional[Tuple[float, float, float, float]] = None,
227
229
  **kwargs,
228
230
  ) -> List[RenderSpec]:
@@ -232,7 +234,12 @@ class Region(
232
234
  mode: Rendering mode - 'show' includes highlights, 'render' is clean
233
235
  color: Color for highlighting this region in show mode
234
236
  highlights: Additional highlight groups to show, or False to disable all highlights
235
- crop: Whether to crop to this region
237
+ crop: Cropping mode:
238
+ - False: No cropping
239
+ - True: Crop to region bounds (default for regions)
240
+ - int: Padding in pixels around region
241
+ - 'wide': Full page width, cropped vertically to region
242
+ - Region: Crop to the bounds of another region
236
243
  crop_bbox: Explicit crop bounds (overrides region bounds)
237
244
  **kwargs: Additional parameters
238
245
 
@@ -247,15 +254,34 @@ class Region(
247
254
  if crop_bbox:
248
255
  spec.crop_bbox = crop_bbox
249
256
  elif crop:
250
- # Crop to this region's bounds
251
- spec.crop_bbox = self.bbox
257
+ x0, y0, x1, y1 = self.bbox
258
+
259
+ if crop is True:
260
+ # Crop to region bounds
261
+ spec.crop_bbox = self.bbox
262
+ elif isinstance(crop, (int, float)):
263
+ # Add padding around region
264
+ padding = float(crop)
265
+ spec.crop_bbox = (
266
+ max(0, x0 - padding),
267
+ max(0, y0 - padding),
268
+ min(self.page.width, x1 + padding),
269
+ min(self.page.height, y1 + padding),
270
+ )
271
+ elif crop == "wide":
272
+ # Full page width, cropped vertically to region
273
+ spec.crop_bbox = (0, y0, self.page.width, y1)
274
+ elif hasattr(crop, "bbox"):
275
+ # Crop to another region's bounds
276
+ spec.crop_bbox = crop.bbox
252
277
 
253
278
  # Add highlights in show mode (unless explicitly disabled with highlights=False)
254
279
  if mode == "show" and highlights is not False:
255
280
  # Only highlight this region if:
256
281
  # 1. We're not cropping, OR
257
- # 2. We're cropping but color was explicitly specified
258
- if not crop or color is not None:
282
+ # 2. We're cropping but color was explicitly specified, OR
283
+ # 3. We're cropping to another region (not tight crop)
284
+ if not crop or color is not None or (crop and not isinstance(crop, bool)):
259
285
  spec.add_highlight(
260
286
  bbox=self.bbox,
261
287
  polygon=self.polygon if self.has_polygon else None,
@@ -1237,6 +1263,8 @@ class Region(
1237
1263
  Union[str, Callable[[str], bool], List[str]]
1238
1264
  ] = None, # NEW: Content filtering
1239
1265
  apply_exclusions: bool = True, # Whether to apply exclusion regions during extraction
1266
+ verticals: Optional[List] = None, # Explicit vertical lines
1267
+ horizontals: Optional[List] = None, # Explicit horizontal lines
1240
1268
  ) -> TableResult: # Return type allows Optional[str] for cells
1241
1269
  """
1242
1270
  Extract a table from this region.
@@ -1263,6 +1291,10 @@ class Region(
1263
1291
  Works with all extraction methods by filtering cell content.
1264
1292
  apply_exclusions: Whether to apply exclusion regions during text extraction (default: True).
1265
1293
  When True, text within excluded regions (e.g., headers/footers) will not be extracted.
1294
+ verticals: Optional list of explicit vertical lines for table extraction. When provided,
1295
+ automatically sets vertical_strategy='explicit' and explicit_vertical_lines.
1296
+ horizontals: Optional list of explicit horizontal lines for table extraction. When provided,
1297
+ automatically sets horizontal_strategy='explicit' and explicit_horizontal_lines.
1266
1298
 
1267
1299
  Returns:
1268
1300
  Table data as a list of rows, where each row is a list of cell values (str or None).
@@ -1273,6 +1305,14 @@ class Region(
1273
1305
  if text_options is None:
1274
1306
  text_options = {} # Initialize empty dict
1275
1307
 
1308
+ # Handle explicit vertical and horizontal lines
1309
+ if verticals is not None:
1310
+ table_settings["vertical_strategy"] = "explicit"
1311
+ table_settings["explicit_vertical_lines"] = verticals
1312
+ if horizontals is not None:
1313
+ table_settings["horizontal_strategy"] = "explicit"
1314
+ table_settings["explicit_horizontal_lines"] = horizontals
1315
+
1276
1316
  # Auto-detect method if not specified
1277
1317
  if method is None:
1278
1318
  # If this is a TATR-detected region, use TATR method
@@ -2547,7 +2587,13 @@ class Region(
2547
2587
 
2548
2588
  return self
2549
2589
 
2550
- def get_section_between(self, start_element=None, end_element=None, include_boundaries="both"):
2590
+ def get_section_between(
2591
+ self,
2592
+ start_element=None,
2593
+ end_element=None,
2594
+ include_boundaries="both",
2595
+ orientation="vertical",
2596
+ ):
2551
2597
  """
2552
2598
  Get a section between two elements within this region.
2553
2599
 
@@ -2555,6 +2601,7 @@ class Region(
2555
2601
  start_element: Element marking the start of the section
2556
2602
  end_element: Element marking the end of the section
2557
2603
  include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none'
2604
+ orientation: 'vertical' (default) or 'horizontal' - determines section direction
2558
2605
 
2559
2606
  Returns:
2560
2607
  Region representing the section
@@ -2599,41 +2646,67 @@ class Region(
2599
2646
  else:
2600
2647
  end_element = elements[-1] # Default end is last element
2601
2648
 
2602
- # Adjust indexes based on boundary inclusion
2603
- start_element_for_bbox = start_element
2604
- end_element_for_bbox = end_element
2605
-
2606
- if include_boundaries == "none":
2607
- start_idx += 1
2608
- end_idx -= 1
2609
- start_element_for_bbox = elements[start_idx] if start_idx <= end_idx else None
2610
- end_element_for_bbox = elements[end_idx] if start_idx <= end_idx else None
2611
- elif include_boundaries == "start":
2612
- end_idx -= 1
2613
- end_element_for_bbox = elements[end_idx] if start_idx <= end_idx else None
2614
- elif include_boundaries == "end":
2615
- start_idx += 1
2616
- start_element_for_bbox = elements[start_idx] if start_idx <= end_idx else None
2617
-
2618
- # Ensure valid indexes
2619
- start_idx = max(0, start_idx)
2620
- end_idx = min(len(elements) - 1, end_idx)
2621
-
2622
- # If no valid elements in range, return empty region
2623
- if start_idx > end_idx or start_element_for_bbox is None or end_element_for_bbox is None:
2624
- logger.debug("No valid elements in range for get_section_between.")
2625
- # Return an empty region positioned at the start element boundary
2626
- anchor = start_element if start_element else self
2627
- return Region(self.page, (anchor.x0, anchor.top, anchor.x0, anchor.top))
2628
-
2629
- # Get elements in range based on adjusted indices
2630
- section_elements = elements[start_idx : end_idx + 1]
2631
-
2632
- # Create bounding box around the ELEMENTS included based on indices
2633
- x0 = min(e.x0 for e in section_elements)
2634
- top = min(e.top for e in section_elements)
2635
- x1 = max(e.x1 for e in section_elements)
2636
- bottom = max(e.bottom for e in section_elements)
2649
+ # Validate orientation parameter
2650
+ if orientation not in ["vertical", "horizontal"]:
2651
+ raise ValueError(f"orientation must be 'vertical' or 'horizontal', got '{orientation}'")
2652
+
2653
+ # Calculate the section boundaries based on orientation and include_boundaries
2654
+ if orientation == "vertical":
2655
+ # Use full width of the parent region for vertical sections
2656
+ x0 = self.x0 # Use parent region's left boundary
2657
+ x1 = self.x1 # Use parent region's right boundary
2658
+
2659
+ # Determine vertical boundaries based on include_boundaries
2660
+ if include_boundaries == "both":
2661
+ # Include both boundary elements
2662
+ top = start_element.top
2663
+ bottom = end_element.bottom
2664
+ elif include_boundaries == "start":
2665
+ # Include start element, exclude end element
2666
+ top = start_element.top
2667
+ bottom = end_element.top # Stop at the top of end element
2668
+ elif include_boundaries == "end":
2669
+ # Exclude start element, include end element
2670
+ top = start_element.bottom # Start at the bottom of start element
2671
+ bottom = end_element.bottom
2672
+ else: # "none"
2673
+ # Exclude both boundary elements
2674
+ top = start_element.bottom # Start at the bottom of start element
2675
+ bottom = end_element.top # Stop at the top of end element
2676
+
2677
+ # Ensure valid boundaries
2678
+ if top >= bottom:
2679
+ logger.debug(f"Invalid section boundaries: top={top} >= bottom={bottom}")
2680
+ # Return an empty region
2681
+ return Region(self.page, (x0, top, x0, top))
2682
+ else: # horizontal
2683
+ # Use full height of the parent region for horizontal sections
2684
+ top = self.top # Use parent region's top boundary
2685
+ bottom = self.bottom # Use parent region's bottom boundary
2686
+
2687
+ # Determine horizontal boundaries based on include_boundaries
2688
+ if include_boundaries == "both":
2689
+ # Include both boundary elements
2690
+ x0 = start_element.x0
2691
+ x1 = end_element.x1
2692
+ elif include_boundaries == "start":
2693
+ # Include start element, exclude end element
2694
+ x0 = start_element.x0
2695
+ x1 = end_element.x0 # Stop at the left of end element
2696
+ elif include_boundaries == "end":
2697
+ # Exclude start element, include end element
2698
+ x0 = start_element.x1 # Start at the right of start element
2699
+ x1 = end_element.x1
2700
+ else: # "none"
2701
+ # Exclude both boundary elements
2702
+ x0 = start_element.x1 # Start at the right of start element
2703
+ x1 = end_element.x0 # Stop at the left of end element
2704
+
2705
+ # Ensure valid boundaries
2706
+ if x0 >= x1:
2707
+ logger.debug(f"Invalid section boundaries: x0={x0} >= x1={x1}")
2708
+ # Return an empty region
2709
+ return Region(self.page, (x0, top, x0, top))
2637
2710
 
2638
2711
  # Create new region
2639
2712
  section = Region(self.page, (x0, top, x1, bottom))
@@ -2644,7 +2717,11 @@ class Region(
2644
2717
  return section
2645
2718
 
2646
2719
  def get_sections(
2647
- self, start_elements=None, end_elements=None, include_boundaries="both"
2720
+ self,
2721
+ start_elements=None,
2722
+ end_elements=None,
2723
+ include_boundaries="both",
2724
+ orientation="vertical",
2648
2725
  ) -> "ElementCollection[Region]":
2649
2726
  """
2650
2727
  Get sections within this region based on start/end elements.
@@ -2653,6 +2730,7 @@ class Region(
2653
2730
  start_elements: Elements or selector string that mark the start of sections
2654
2731
  end_elements: Elements or selector string that mark the end of sections
2655
2732
  include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none'
2733
+ orientation: 'vertical' (default) or 'horizontal' - determines section direction
2656
2734
 
2657
2735
  Returns:
2658
2736
  List of Region objects representing the extracted sections
@@ -2687,9 +2765,12 @@ class Region(
2687
2765
  if not start_elements:
2688
2766
  return []
2689
2767
 
2690
- # Sort all elements within the region in reading order
2768
+ # Sort all elements within the region based on orientation
2691
2769
  all_elements_in_region = self.get_elements()
2692
- all_elements_in_region.sort(key=lambda e: (e.top, e.x0))
2770
+ if orientation == "vertical":
2771
+ all_elements_in_region.sort(key=lambda e: (e.top, e.x0))
2772
+ else: # horizontal
2773
+ all_elements_in_region.sort(key=lambda e: (e.x0, e.top))
2693
2774
 
2694
2775
  if not all_elements_in_region:
2695
2776
  return [] # Cannot create sections if region is empty
@@ -2731,7 +2812,9 @@ class Region(
2731
2812
  start_element = current_start_boundary["element"]
2732
2813
  end_element = boundary["element"]
2733
2814
  # Use the helper, ensuring elements are from within the region
2734
- section = self.get_section_between(start_element, end_element, include_boundaries)
2815
+ section = self.get_section_between(
2816
+ start_element, end_element, include_boundaries, orientation
2817
+ )
2735
2818
  sections.append(section)
2736
2819
  current_start_boundary = None # Reset
2737
2820
 
@@ -2748,7 +2831,7 @@ class Region(
2748
2831
  if end_idx >= 0 and end_idx >= current_start_boundary["index"]:
2749
2832
  end_element = all_elements_in_region[end_idx]
2750
2833
  section = self.get_section_between(
2751
- start_element, end_element, include_boundaries
2834
+ start_element, end_element, include_boundaries, orientation
2752
2835
  )
2753
2836
  sections.append(section)
2754
2837
  # Else: Section started and ended by consecutive start elements? Create empty?
@@ -2762,7 +2845,9 @@ class Region(
2762
2845
  start_element = current_start_boundary["element"]
2763
2846
  # End at the last element within the region
2764
2847
  end_element = all_elements_in_region[-1]
2765
- section = self.get_section_between(start_element, end_element, include_boundaries)
2848
+ section = self.get_section_between(
2849
+ start_element, end_element, include_boundaries, orientation
2850
+ )
2766
2851
  sections.append(section)
2767
2852
 
2768
2853
  return ElementCollection(sections)
@@ -3016,6 +3101,54 @@ class Region(
3016
3101
 
3017
3102
  return all_descendants
3018
3103
 
3104
+ def __add__(
3105
+ self, other: Union["Element", "Region", "ElementCollection"]
3106
+ ) -> "ElementCollection":
3107
+ """Add regions/elements together to create an ElementCollection.
3108
+
3109
+ This allows intuitive combination of regions using the + operator:
3110
+ ```python
3111
+ complainant = section.find("text:contains(Complainant)").right(until='text')
3112
+ dob = section.find("text:contains(DOB)").right(until='text')
3113
+ combined = complainant + dob # Creates ElementCollection with both regions
3114
+ ```
3115
+
3116
+ Args:
3117
+ other: Another Region, Element or ElementCollection to combine
3118
+
3119
+ Returns:
3120
+ ElementCollection containing all elements
3121
+ """
3122
+ from natural_pdf.elements.base import Element
3123
+ from natural_pdf.elements.element_collection import ElementCollection
3124
+
3125
+ # Create a list starting with self
3126
+ elements = [self]
3127
+
3128
+ # Add the other element(s)
3129
+ if isinstance(other, (Element, Region)):
3130
+ elements.append(other)
3131
+ elif isinstance(other, ElementCollection):
3132
+ elements.extend(other)
3133
+ elif hasattr(other, "__iter__") and not isinstance(other, (str, bytes)):
3134
+ # Handle other iterables but exclude strings
3135
+ elements.extend(other)
3136
+ else:
3137
+ raise TypeError(f"Cannot add Region with {type(other)}")
3138
+
3139
+ return ElementCollection(elements)
3140
+
3141
+ def __radd__(
3142
+ self, other: Union["Element", "Region", "ElementCollection"]
3143
+ ) -> "ElementCollection":
3144
+ """Right-hand addition to support ElementCollection + Region."""
3145
+ if other == 0:
3146
+ # This handles sum() which starts with 0
3147
+ from natural_pdf.elements.element_collection import ElementCollection
3148
+
3149
+ return ElementCollection([self])
3150
+ return self.__add__(other)
3151
+
3019
3152
  def __repr__(self) -> str:
3020
3153
  """String representation of the region."""
3021
3154
  poly_info = " (Polygon)" if self.has_polygon else ""
natural_pdf/flows/flow.py CHANGED
@@ -1296,6 +1296,7 @@ class Flow(Visualizable):
1296
1296
  end_elements=None,
1297
1297
  new_section_on_page_break: bool = False,
1298
1298
  include_boundaries: str = "both",
1299
+ orientation: str = "vertical",
1299
1300
  ) -> "ElementCollection":
1300
1301
  """
1301
1302
  Extract logical sections from the Flow based on *start* and *end* boundary
@@ -1317,6 +1318,7 @@ class Flow(Visualizable):
1317
1318
  boundaries (default: False).
1318
1319
  include_boundaries: How to include boundary elements: 'start',
1319
1320
  'end', 'both', or 'none' (default: 'both').
1321
+ orientation: 'vertical' (default) or 'horizontal' - determines section direction.
1320
1322
 
1321
1323
  Returns:
1322
1324
  ElementCollection of Region/FlowRegion objects representing the
@@ -1408,6 +1410,7 @@ class Flow(Visualizable):
1408
1410
  start_elements=seg_start_elems,
1409
1411
  end_elements=seg_end_elems,
1410
1412
  include_boundaries=include_boundaries,
1413
+ orientation=orientation,
1411
1414
  )
1412
1415
 
1413
1416
  if seg_sections:
@@ -530,8 +530,8 @@ PSEUDO_CLASS_FUNCTIONS = {
530
530
  "italic": lambda el: hasattr(el, "italic") and el.italic,
531
531
  "first-child": lambda el: hasattr(el, "parent") and el.parent and el.parent.children[0] == el,
532
532
  "last-child": lambda el: hasattr(el, "parent") and el.parent and el.parent.children[-1] == el,
533
- "empty": lambda el: not el.text,
534
- "not-empty": lambda el: el.text,
533
+ "empty": lambda el: not hasattr(el, "text") or not el.text or not el.text.strip(),
534
+ "not-empty": lambda el: bool(hasattr(el, "text") and el.text and el.text.strip()),
535
535
  "not-bold": lambda el: hasattr(el, "bold") and not el.bold,
536
536
  "not-italic": lambda el: hasattr(el, "italic") and not el.italic,
537
537
  }
@@ -0,0 +1,100 @@
1
+ """
2
+ Utility functions for color formatting and conversion.
3
+ """
4
+
5
+ from typing import Any, List, Optional, Tuple, Union
6
+
7
+ # List of known color attribute names in natural-pdf
8
+ COLOR_ATTRIBUTES = [
9
+ "color",
10
+ "fill",
11
+ "stroke",
12
+ "non_stroking_color",
13
+ "stroking_color",
14
+ "text_color",
15
+ "background_color",
16
+ "highlight_color",
17
+ "border_color",
18
+ ]
19
+
20
+
21
+ def rgb_to_hex(color: Union[Tuple[float, ...], List[float]]) -> str:
22
+ """
23
+ Convert an RGB/RGBA color tuple to hex string.
24
+
25
+ Args:
26
+ color: RGB tuple with values either in [0,1] or [0,255] range
27
+ Can be RGB (3 values) or RGBA (4 values)
28
+
29
+ Returns:
30
+ Hex color string (e.g., '#ff0000' for red)
31
+ """
32
+ if not isinstance(color, (tuple, list)) or len(color) < 3:
33
+ raise ValueError(f"Invalid color format: {color}")
34
+
35
+ # Take first 3 values (RGB), ignore alpha if present
36
+ r, g, b = color[:3]
37
+
38
+ # Determine if values are in [0,1] or [0,255] range
39
+ # If any positive value is > 1, assume [0,255] range
40
+ max_val = max(abs(r), abs(g), abs(b))
41
+
42
+ if max_val > 1:
43
+ # Values are in 0-255 range
44
+ r_int = int(min(255, max(0, r)))
45
+ g_int = int(min(255, max(0, g)))
46
+ b_int = int(min(255, max(0, b)))
47
+ else:
48
+ # Values are in 0-1 range, convert to 0-255
49
+ r_int = int(min(255, max(0, r * 255)))
50
+ g_int = int(min(255, max(0, g * 255)))
51
+ b_int = int(min(255, max(0, b * 255)))
52
+
53
+ return f"#{r_int:02x}{g_int:02x}{b_int:02x}"
54
+
55
+
56
+ def is_color_attribute(attr_name: str) -> bool:
57
+ """
58
+ Check if an attribute name is a known color attribute.
59
+
60
+ Args:
61
+ attr_name: The attribute name to check
62
+
63
+ Returns:
64
+ True if this is a known color attribute
65
+ """
66
+ return attr_name.lower() in [attr.lower() for attr in COLOR_ATTRIBUTES]
67
+
68
+
69
+ def format_color_value(value: Any, attr_name: Optional[str] = None) -> str:
70
+ """
71
+ Format a color value for display, converting tuples to hex when appropriate.
72
+
73
+ Args:
74
+ value: The value to format
75
+ attr_name: Optional attribute name to help determine if this is a color
76
+
77
+ Returns:
78
+ Formatted string representation
79
+ """
80
+ # If attr_name is provided and it's not a color attribute, return as-is
81
+ if attr_name and not is_color_attribute(attr_name):
82
+ return str(value)
83
+
84
+ # Check if value looks like an RGB color tuple
85
+ if isinstance(value, (tuple, list)):
86
+ # Must have 3 or 4 values (RGB or RGBA)
87
+ if len(value) in (3, 4):
88
+ # Check if all values are numeric
89
+ if all(isinstance(v, (int, float)) for v in value):
90
+ # Additional validation: values should be in reasonable ranges
91
+ # Either all in [0,1] or all in [0,255]
92
+ if all(0 <= v <= 1 for v in value[:3]) or all(0 <= v <= 255 for v in value[:3]):
93
+ try:
94
+ return rgb_to_hex(value)
95
+ except Exception:
96
+ # If conversion fails, fall back to string representation
97
+ pass
98
+
99
+ # Default: convert to string
100
+ return str(value)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.5
3
+ Version: 0.2.8
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -2,7 +2,7 @@ natural_pdf/__init__.py,sha256=N4pR0LbuPEnUYFZqbdVqc_FGKldgwPQc1wjJhYKTBBM,3417
2
2
  natural_pdf/cli.py,sha256=SkPwhhMM-GhLsj3O1n1Agxz4KOxcZ08sj8hVQSFJB5c,4064
3
3
  natural_pdf/text_mixin.py,sha256=eFCiHj6Okcw3aum4955BepcI2NPRalkf9UFFVTc_H30,4012
4
4
  natural_pdf/analyzers/__init__.py,sha256=3XGoNq3OgiVkZP7tOdeP5XVUl7fDgyztdA8DlOcMLXg,1138
5
- natural_pdf/analyzers/guides.py,sha256=mLWPPEwywo_FbU3gSoegiRlzxYmkHEo2c4DLX9krH9k,157691
5
+ natural_pdf/analyzers/guides.py,sha256=RHFTc2n6kzKrjsd2pk-1MfG1esuEpnTJr8GrsTqlF3A,160441
6
6
  natural_pdf/analyzers/shape_detection_mixin.py,sha256=mgpyJ4jIulz9l9HCqThabJIsLSrXh9BB2AmLxUoHmw0,62584
7
7
  natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
8
8
  natural_pdf/analyzers/text_structure.py,sha256=3WWusi-BI0krUnJxB05DD6XmKj5qRNvQBqH7zOQGm1M,28451
@@ -27,24 +27,24 @@ natural_pdf/collections/mixins.py,sha256=Se2C5AcpP9B5E0d0pIrey6-f_P32tAXTK4M7666
27
27
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
28
28
  natural_pdf/core/element_manager.py,sha256=KPuKM7SstfErTkRnGq4vrgE0Tv8iazN13Jp7yAXGKso,55575
29
29
  natural_pdf/core/highlighting_service.py,sha256=7on8nErhi50CEH2L4XzGIZ6tIqZtMzmmFlp-2lmwnYE,68856
30
- natural_pdf/core/page.py,sha256=Q3hBvB9KFB8doeXY7YVQt3G1ULdBDfA-0BQD6YPN4oo,144640
31
- natural_pdf/core/page_collection.py,sha256=hEeXs_fzB73XZ8ZkHz2kIuSgBYcVYydvGMMdGuB1rvw,52486
32
- natural_pdf/core/page_groupby.py,sha256=550ME6kd-h-2u75oUIIIqTYsmh8VvdQO1nXXioL8J6A,7378
33
- natural_pdf/core/pdf.py,sha256=VslSn00So6157XfiYbrB9URpx5VlWyshQOt7upi9us4,104248
30
+ natural_pdf/core/page.py,sha256=XmXii652iM-JVKgzpbKQ8f59U0TvDLD5iAfdtx92gis,152675
31
+ natural_pdf/core/page_collection.py,sha256=IjdFq9q0D0P6ZKWInf0H25rLzxfMb7RsUXucogkhNkU,63169
32
+ natural_pdf/core/page_groupby.py,sha256=V2e_RNlHaasUzYm2h2vNJI7_aV_fl3_pg7kU3F2j0z8,8218
33
+ natural_pdf/core/pdf.py,sha256=ovdeu9TRPnVYyMltD7QpcdcFYBLZFXh3LlfC5ifj6RY,104227
34
34
  natural_pdf/core/pdf_collection.py,sha256=s3ogu4CEHrHMTRqQMJUKJZ-9Ii8b_B9dWbVLTFj0s7g,34992
35
- natural_pdf/core/render_spec.py,sha256=rLicaS9EPyojpJcjy2Lzn5DLWQwjrFyDJyRo7jbjdGU,14505
35
+ natural_pdf/core/render_spec.py,sha256=y9QkMiIvWaKiEBlV0TjyldADIEUY3YfWLQXxStHu1S4,15480
36
36
  natural_pdf/describe/__init__.py,sha256=kIV7ORmWWB1SAur7nK2aAwR-wHqSedhKfUsaUl4hG0A,586
37
- natural_pdf/describe/base.py,sha256=Of9WVo9XuShXoeyJr0RN2CpLhF_CeiOjazl-or53RKU,18173
38
- natural_pdf/describe/elements.py,sha256=JicXC9SJmmasqxalpCXA47-kVwv-6JnR3Xiu778aNHM,12634
37
+ natural_pdf/describe/base.py,sha256=LYbDjjQYOIZsYBbBQH3UP6XyWArJJvRc8LUugeVdJw0,18178
38
+ natural_pdf/describe/elements.py,sha256=3Y541z5TQ2obrfZFiFi1YQMsCt3oYrhMHpD5j1tuppw,12639
39
39
  natural_pdf/describe/mixin.py,sha256=rkX14aGrSz7Jvxx8Rbxv3eSfbO-_29DipwpstrV2pDQ,3109
40
40
  natural_pdf/describe/summary.py,sha256=cfT4ZQkeatCDAOwWPwhtEVXisNgk6E57fAXAnoRysSU,7645
41
41
  natural_pdf/elements/__init__.py,sha256=ICNikmLeIEuSYypz-KnkBn8xR1hR7rge4hsa1KLkyWY,42
42
- natural_pdf/elements/base.py,sha256=aj-eXOQQlhKv9lYeUlUs9aKNcUebtG_dqxURZHZVZ58,55509
43
- natural_pdf/elements/element_collection.py,sha256=slCUnOT04sNOTjSGgmhjcCKKPVPtdDPwU7PX1ebzGMw,101342
42
+ natural_pdf/elements/base.py,sha256=92ukTtRCQFsa5KvKflChCt4mt0ZGS4ecGYCQTNMO4zU,58907
43
+ natural_pdf/elements/element_collection.py,sha256=-piFQGiDPiqmnl-Cpoi3PGPmGe4AYvpl0IqaJGxBsBc,129405
44
44
  natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
45
45
  natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
46
46
  natural_pdf/elements/rect.py,sha256=0lNkVkPkvbRbrFED856RXoUcTcDkeeOIs5xldKGAQT8,3324
47
- natural_pdf/elements/region.py,sha256=_NNBewHlyUHvA4g9kApilP6it0cn2IRlcGG4r993oUI,156660
47
+ natural_pdf/elements/region.py,sha256=XLbaMEQ-DXzbh4Xnv72ebS1ZlT5EuWpistz0O6bOSag,162583
48
48
  natural_pdf/elements/text.py,sha256=829uSJv9E-8cC6T6iR_Va7Xtv54pJoyRN78fq4NN1d4,20687
49
49
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
50
50
  natural_pdf/exporters/__init__.py,sha256=QffoARekR6WzXEd05oxOytly4qPdBizuIF-SUkeFpig,643
@@ -63,7 +63,7 @@ natural_pdf/extraction/result.py,sha256=PDaCCN2LQBbHsZy0_lrQ0ROeMsnmH1WRoXWOjk9M
63
63
  natural_pdf/flows/__init__.py,sha256=cUN4A8hTDLZSRr4PO2W_lR4z6hWpbNG8Seox-IIcrLU,277
64
64
  natural_pdf/flows/collections.py,sha256=ErkHWdX6W_y1SjkcA_bGM0uUYRGPWWpRkHip6LHpej0,25740
65
65
  natural_pdf/flows/element.py,sha256=T-9uXsIBe7mIim-mQQMep6Ja5dRfWaYIj8g1ak_Bv8c,24892
66
- natural_pdf/flows/flow.py,sha256=If4G0feMsO3jZO5T2YXvGhb2UygMgwK3567JD42bSl0,85975
66
+ natural_pdf/flows/flow.py,sha256=BuT3DBqNvLEqYle66-nZFO91i_1s98CAat28Dg-JjGU,86149
67
67
  natural_pdf/flows/region.py,sha256=r_cFtBlmPi7ADN3k8oYA1s_vyz8GeQLCnYcv58Zt5eM,52263
68
68
  natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
69
69
  natural_pdf/ocr/engine.py,sha256=SwNlWydtHbrIghV5JD_j5B4-rnjCMYIWUIEARag-zHw,11839
@@ -85,12 +85,13 @@ natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzP
85
85
  natural_pdf/search/search_service_protocol.py,sha256=u8pbuWP96fnQEe6mnreY9DrdiDAHP6ZCY7phvSbFlP8,6697
86
86
  natural_pdf/search/searchable_mixin.py,sha256=hqQ_AuID5eTGRCtKYdFLZ1zF35y73uk3x1M1VW9Il8U,23514
87
87
  natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
88
- natural_pdf/selectors/parser.py,sha256=pw0M8ICKPMOzZPzWpLsQMG_lnl8PewGIdIG3ciukabk,38877
88
+ natural_pdf/selectors/parser.py,sha256=yV5Eb0VyNZocoYIXi7SMKsf8o66vrGNb-MeT27aEj-M,38977
89
89
  natural_pdf/tables/__init__.py,sha256=sCvCGbGsL6BiqlNxAYfVv003bIDLI11FmjHhaWfcU6w,104
90
90
  natural_pdf/tables/result.py,sha256=-8ctA-jCJYSHtlfAoqTvhUwO5zSP2BQxxetAjqEsNyg,8665
91
91
  natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
92
92
  natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
93
93
  natural_pdf/utils/bidi_mirror.py,sha256=jJEES0xDrMfo5Me8kHMxHv4COS51PitnYi2EvKv3HCE,1151
94
+ natural_pdf/utils/color_utils.py,sha256=6v2hqk4WdaUR85fFbOej_T4AeASpVeze3xVRWgNKqOk,3128
94
95
  natural_pdf/utils/debug.py,sha256=Epwie_jmRgknUSaEoxEyvr1lBXpfYTFOe2UQh_zSj_0,1026
95
96
  natural_pdf/utils/highlighting.py,sha256=c9SvvPaJDI9bWXzq1A7zdh_0s3C4GCMngrJdkL2AMeM,719
96
97
  natural_pdf/utils/identifiers.py,sha256=P7n6owcubnF8oAMa_UfYtENmIaJQdH_AMC9Jbs2bWXo,1117
@@ -106,7 +107,7 @@ natural_pdf/vision/results.py,sha256=F2zXG3MVZIpOUvPkJHotOq6-9rFz68BaO_8pnSndlOs
106
107
  natural_pdf/vision/similarity.py,sha256=YH8legN-t9uf1b_XULi4JLNDaRfPNKQwU1FZ4Qu08jY,11740
107
108
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
108
109
  natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
109
- natural_pdf-0.2.5.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
110
+ natural_pdf-0.2.8.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
110
111
  optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
111
112
  optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
112
113
  optimization/performance_analysis.py,sha256=JBXnR9hc7Ix7YCnt3EJPSpsyqIUgKsc7GEffQ_TDCBk,13033
@@ -123,8 +124,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
123
124
  tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
124
125
  tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
125
126
  tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
126
- natural_pdf-0.2.5.dist-info/METADATA,sha256=H9nhjh1zRBmz2vUTe_j6FT-Zvn1sgoWT0nyoZG5GTYg,6959
127
- natural_pdf-0.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
128
- natural_pdf-0.2.5.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
129
- natural_pdf-0.2.5.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
130
- natural_pdf-0.2.5.dist-info/RECORD,,
127
+ natural_pdf-0.2.8.dist-info/METADATA,sha256=tuWXV-mY9zU0qsVsXhrrp3aGBfSxlklUxS_Dlllqmp4,6959
128
+ natural_pdf-0.2.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
129
+ natural_pdf-0.2.8.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
130
+ natural_pdf-0.2.8.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
131
+ natural_pdf-0.2.8.dist-info/RECORD,,