natural-pdf 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,8 @@ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Uni
6
6
 
7
7
  from PIL import Image
8
8
 
9
+ # Import global options
10
+ import natural_pdf
9
11
  from natural_pdf.classification.mixin import ClassificationMixin
10
12
  from natural_pdf.core.render_spec import RenderSpec, Visualizable
11
13
  from natural_pdf.describe.mixin import DescribeMixin
@@ -18,6 +20,7 @@ if TYPE_CHECKING:
18
20
  from natural_pdf.core.page import Page
19
21
  from natural_pdf.elements.element_collection import ElementCollection
20
22
  from natural_pdf.elements.region import Region
23
+ from natural_pdf.flows.region import FlowRegion
21
24
 
22
25
 
23
26
  def extract_bbox(obj: Any) -> Optional[Tuple[float, float, float, float]]:
@@ -93,6 +96,16 @@ class DirectionalMixin:
93
96
  - above(): Create region above
94
97
  - below(): Create region below
95
98
 
99
+ Smart defaults:
100
+ - left() and right() default to element height
101
+ - above() and below() default to full page width
102
+ - All methods use a small offset (default 0.01 points) to avoid character overlap
103
+
104
+ Global offset configuration:
105
+ The default offset can be changed globally:
106
+ import natural_pdf as npdf
107
+ npdf.options.layout.directional_offset = 0.05 # Change to 0.05 points
108
+
96
109
  Note:
97
110
  This mixin requires the implementing class to have 'page', 'x0', 'top',
98
111
  'x1', and 'bottom' attributes for coordinate calculations.
@@ -107,8 +120,10 @@ class DirectionalMixin:
107
120
  until: Optional[str] = None,
108
121
  include_endpoint: bool = True,
109
122
  offset: float = 0.0,
123
+ apply_exclusions: bool = True,
124
+ multipage: bool = False,
110
125
  **kwargs,
111
- ) -> "Region":
126
+ ) -> Union["Region", "FlowRegion"]:
112
127
  """
113
128
  Protected helper method to create a region in a specified direction relative to this element/region.
114
129
 
@@ -119,7 +134,8 @@ class DirectionalMixin:
119
134
  include_source: Whether to include this element/region's area in the result
120
135
  until: Optional selector string to specify a boundary element
121
136
  include_endpoint: Whether to include the boundary element found by 'until'
122
- offset: Pixel offset when excluding source/endpoint (default: 0.1)
137
+ offset: Pixel offset when excluding source/endpoint (default: None, uses natural_pdf.options.layout.directional_offset)
138
+ apply_exclusions: Whether to respect exclusions when using 'until' selector (default: True)
123
139
  **kwargs: Additional parameters for the 'until' selector search
124
140
 
125
141
  Returns:
@@ -189,21 +205,46 @@ class DirectionalMixin:
189
205
  # Only take ones on the same page
190
206
  all_matches = [m for m in until if m.page == self.page]
191
207
  else:
192
- all_matches = self.page.find_all(until, **kwargs)
208
+ all_matches = self.page.find_all(until, apply_exclusions=apply_exclusions, **kwargs)
193
209
  matches_in_direction = []
194
210
 
195
211
  # Filter and sort matches based on direction
212
+ # Also filter by cross-direction bounds when cross_size='element'
196
213
  if direction == "above":
197
214
  matches_in_direction = [m for m in all_matches if m.bottom <= self.top]
215
+ # Filter by horizontal bounds if cross_size='element'
216
+ if cross_size == "element":
217
+ matches_in_direction = [
218
+ m for m in matches_in_direction if m.x0 < self.x1 and m.x1 > self.x0
219
+ ]
198
220
  matches_in_direction.sort(key=lambda e: e.bottom, reverse=True)
199
221
  elif direction == "below":
200
222
  matches_in_direction = [m for m in all_matches if m.top >= self.bottom]
223
+ # Filter by horizontal bounds if cross_size='element'
224
+ if cross_size == "element":
225
+ matches_in_direction = [
226
+ m for m in matches_in_direction if m.x0 < self.x1 and m.x1 > self.x0
227
+ ]
201
228
  matches_in_direction.sort(key=lambda e: e.top)
202
229
  elif direction == "left":
203
230
  matches_in_direction = [m for m in all_matches if m.x1 <= self.x0]
231
+ # Filter by vertical bounds if cross_size='element'
232
+ if cross_size == "element":
233
+ matches_in_direction = [
234
+ m
235
+ for m in matches_in_direction
236
+ if m.top < self.bottom and m.bottom > self.top
237
+ ]
204
238
  matches_in_direction.sort(key=lambda e: e.x1, reverse=True)
205
239
  elif direction == "right":
206
240
  matches_in_direction = [m for m in all_matches if m.x0 >= self.x1]
241
+ # Filter by vertical bounds if cross_size='element'
242
+ if cross_size == "element":
243
+ matches_in_direction = [
244
+ m
245
+ for m in matches_in_direction
246
+ if m.top < self.bottom and m.bottom > self.top
247
+ ]
207
248
  matches_in_direction.sort(key=lambda e: e.x0)
208
249
 
209
250
  if matches_in_direction:
@@ -243,7 +284,51 @@ class DirectionalMixin:
243
284
  final_y1 = max(bbox[1], bbox[3])
244
285
  final_bbox = (final_x0, final_y0, final_x1, final_y1)
245
286
 
246
- # 5. Create and return appropriate object based on self type
287
+ # 5. Check if multipage is needed
288
+ # Use global default if not explicitly set
289
+ use_multipage = multipage
290
+ # If multipage is False but auto_multipage is True, use True
291
+ if not multipage and natural_pdf.options.layout.auto_multipage:
292
+ use_multipage = True
293
+
294
+ # Prevent recursion: if called with internal flag, don't use multipage
295
+ if kwargs.get("_from_flow", False):
296
+ use_multipage = False
297
+
298
+ if use_multipage:
299
+ # Check if we need to cross page boundaries
300
+ needs_multipage = False
301
+
302
+ # Case 1: until was specified but target not found on current page
303
+ if until and not target:
304
+ needs_multipage = True
305
+
306
+ # Case 2: size extends beyond page boundaries
307
+ if not until:
308
+ if direction == "below" and final_bbox[3] >= self.page.height:
309
+ needs_multipage = True
310
+ elif direction == "above" and final_bbox[1] <= 0:
311
+ needs_multipage = True
312
+ elif direction == "right" and final_bbox[2] >= self.page.width:
313
+ needs_multipage = True
314
+ elif direction == "left" and final_bbox[0] <= 0:
315
+ needs_multipage = True
316
+
317
+ if needs_multipage:
318
+ # Use multipage implementation
319
+ return self._direction_multipage(
320
+ direction=direction,
321
+ size=size,
322
+ cross_size=cross_size,
323
+ include_source=include_source,
324
+ until=until,
325
+ include_endpoint=include_endpoint,
326
+ offset=offset,
327
+ apply_exclusions=apply_exclusions,
328
+ **kwargs,
329
+ )
330
+
331
+ # 6. Create and return appropriate object based on self type
247
332
  from natural_pdf.elements.region import Region
248
333
 
249
334
  result = Region(self.page, final_bbox)
@@ -255,6 +340,144 @@ class DirectionalMixin:
255
340
 
256
341
  return result
257
342
 
343
+ def _direction_multipage(
344
+ self,
345
+ direction: str,
346
+ size: Optional[float] = None,
347
+ cross_size: str = "full",
348
+ include_source: bool = False,
349
+ until: Optional[str] = None,
350
+ include_endpoint: bool = True,
351
+ offset: float = 0.0,
352
+ apply_exclusions: bool = True,
353
+ **kwargs,
354
+ ) -> Union["Region", "FlowRegion"]:
355
+ """
356
+ Handle multipage directional navigation by creating a Flow.
357
+
358
+ Returns FlowRegion if result spans multiple pages, Region if on single page.
359
+ """
360
+ # Get access to the PDF to create a Flow
361
+ pdf = self.page.pdf
362
+ # Find the index of the current page
363
+ current_page_idx = None
364
+ for idx, page in enumerate(pdf.pages):
365
+ if page == self.page:
366
+ current_page_idx = idx
367
+ break
368
+
369
+ if current_page_idx is None:
370
+ # Fallback - just use current page
371
+ from natural_pdf.flows.flow import Flow
372
+
373
+ flow = Flow(segments=[self.page], arrangement="vertical")
374
+ from natural_pdf.flows.element import FlowElement
375
+
376
+ flow_element = FlowElement(physical_object=self, flow=flow)
377
+ return getattr(flow_element, direction)(**kwargs)
378
+
379
+ # Determine which pages to include in the Flow based on direction
380
+ if direction in ("below", "right"):
381
+ # Include current page and all following pages
382
+ flow_pages = pdf.pages[current_page_idx:]
383
+ else: # above, left
384
+ # Include all pages up to and including current page
385
+ flow_pages = pdf.pages[: current_page_idx + 1]
386
+
387
+ # Create a temporary Flow
388
+ from natural_pdf.flows.flow import Flow
389
+
390
+ flow = Flow(segments=list(flow_pages), arrangement="vertical")
391
+
392
+ # Find the element in the flow
393
+ # We need to create a FlowElement that corresponds to self
394
+ from natural_pdf.flows.element import FlowElement
395
+
396
+ flow_element = FlowElement(physical_object=self, flow=flow)
397
+
398
+ # Call the directional method on the FlowElement
399
+ # Remove parameters that FlowElement methods don't expect
400
+ flow_kwargs = kwargs.copy()
401
+ flow_kwargs.pop("multipage", None) # Remove multipage parameter
402
+ flow_kwargs.pop("apply_exclusions", None) # FlowElement might not have this
403
+ flow_kwargs.pop("offset", None) # FlowElement doesn't have offset
404
+ flow_kwargs.pop("cross_alignment", None) # Remove to avoid duplicate
405
+
406
+ # Map cross_size to appropriate FlowElement parameter
407
+ if direction in ["below", "above"]:
408
+ # For vertical directions, cross_size maps to width parameters
409
+ if cross_size == "full":
410
+ width_absolute = None # Let FlowElement use its defaults
411
+ elif cross_size == "element":
412
+ width_absolute = self.width
413
+ elif isinstance(cross_size, (int, float)):
414
+ width_absolute = cross_size
415
+ else:
416
+ width_absolute = None
417
+
418
+ result = (
419
+ flow_element.below(
420
+ height=size,
421
+ width_absolute=width_absolute,
422
+ include_source=include_source,
423
+ until=until,
424
+ include_endpoint=include_endpoint,
425
+ **flow_kwargs,
426
+ )
427
+ if direction == "below"
428
+ else flow_element.above(
429
+ height=size,
430
+ width_absolute=width_absolute,
431
+ include_source=include_source,
432
+ until=until,
433
+ include_endpoint=include_endpoint,
434
+ **flow_kwargs,
435
+ )
436
+ )
437
+ else: # left, right
438
+ # For horizontal directions, cross_size maps to height parameters
439
+ if cross_size == "full":
440
+ height_absolute = None # Let FlowElement use its defaults
441
+ elif cross_size == "element":
442
+ height_absolute = self.height
443
+ elif isinstance(cross_size, (int, float)):
444
+ height_absolute = cross_size
445
+ else:
446
+ height_absolute = None
447
+
448
+ result = (
449
+ flow_element.left(
450
+ width=size,
451
+ height_absolute=height_absolute,
452
+ include_source=include_source,
453
+ until=until,
454
+ include_endpoint=include_endpoint,
455
+ **flow_kwargs,
456
+ )
457
+ if direction == "left"
458
+ else flow_element.right(
459
+ width=size,
460
+ height_absolute=height_absolute,
461
+ include_source=include_source,
462
+ until=until,
463
+ include_endpoint=include_endpoint,
464
+ **flow_kwargs,
465
+ )
466
+ )
467
+
468
+ # If the result is a FlowRegion with only one constituent region,
469
+ # return that Region instead
470
+ from natural_pdf.flows.region import FlowRegion
471
+
472
+ if isinstance(result, FlowRegion) and len(result.constituent_regions) == 1:
473
+ single_region = result.constituent_regions[0]
474
+ # Copy over any metadata
475
+ if hasattr(result, "boundary_element_found"):
476
+ single_region.boundary_element = result.boundary_element_found
477
+ return single_region
478
+
479
+ return result
480
+
258
481
  def above(
259
482
  self,
260
483
  height: Optional[float] = None,
@@ -262,9 +485,11 @@ class DirectionalMixin:
262
485
  include_source: bool = False,
263
486
  until: Optional[str] = None,
264
487
  include_endpoint: bool = True,
265
- offset: float = 0.1,
488
+ offset: Optional[float] = None,
489
+ apply_exclusions: bool = True,
490
+ multipage: bool = False,
266
491
  **kwargs,
267
- ) -> "Region":
492
+ ) -> Union["Region", "FlowRegion"]:
268
493
  """
269
494
  Select region above this element/region.
270
495
 
@@ -274,7 +499,10 @@ class DirectionalMixin:
274
499
  include_source: Whether to include this element/region in the result (default: False)
275
500
  until: Optional selector string to specify an upper boundary element
276
501
  include_endpoint: Whether to include the boundary element in the region (default: True)
277
- offset: Pixel offset when excluding source/endpoint (default: 0.1)
502
+ offset: Pixel offset when excluding source/endpoint (default: None, uses natural_pdf.options.layout.directional_offset)
503
+ apply_exclusions: Whether to respect exclusions when using 'until' selector (default: True)
504
+ multipage: If True, allows the region to span multiple pages. Returns FlowRegion
505
+ if the result spans multiple pages, Region otherwise (default: False)
278
506
  **kwargs: Additional parameters
279
507
 
280
508
  Returns:
@@ -292,6 +520,10 @@ class DirectionalMixin:
292
520
  signature.above(until='text:contains("Date")') # Region from date to signature
293
521
  ```
294
522
  """
523
+ # Use global default if offset not provided
524
+ if offset is None:
525
+ offset = natural_pdf.options.layout.directional_offset
526
+
295
527
  return self._direction(
296
528
  direction="above",
297
529
  size=height,
@@ -300,6 +532,8 @@ class DirectionalMixin:
300
532
  until=until,
301
533
  include_endpoint=include_endpoint,
302
534
  offset=offset,
535
+ apply_exclusions=apply_exclusions,
536
+ multipage=multipage,
303
537
  **kwargs,
304
538
  )
305
539
 
@@ -310,9 +544,11 @@ class DirectionalMixin:
310
544
  include_source: bool = False,
311
545
  until: Optional[str] = None,
312
546
  include_endpoint: bool = True,
313
- offset: float = 0.1,
547
+ offset: Optional[float] = None,
548
+ apply_exclusions: bool = True,
549
+ multipage: bool = False,
314
550
  **kwargs,
315
- ) -> "Region":
551
+ ) -> Union["Region", "FlowRegion"]:
316
552
  """
317
553
  Select region below this element/region.
318
554
 
@@ -322,7 +558,10 @@ class DirectionalMixin:
322
558
  include_source: Whether to include this element/region in the result (default: False)
323
559
  until: Optional selector string to specify a lower boundary element
324
560
  include_endpoint: Whether to include the boundary element in the region (default: True)
325
- offset: Pixel offset when excluding source/endpoint (default: 0.1)
561
+ multipage: If True, allows the region to span multiple pages. Returns FlowRegion
562
+ if the result spans multiple pages, Region otherwise (default: False)
563
+ offset: Pixel offset when excluding source/endpoint (default: None, uses natural_pdf.options.layout.directional_offset)
564
+ apply_exclusions: Whether to respect exclusions when using 'until' selector (default: True)
326
565
  **kwargs: Additional parameters
327
566
 
328
567
  Returns:
@@ -340,6 +579,10 @@ class DirectionalMixin:
340
579
  header.below(height=200) # Gets 200pt tall region below header
341
580
  ```
342
581
  """
582
+ # Use global default if offset not provided
583
+ if offset is None:
584
+ offset = natural_pdf.options.layout.directional_offset
585
+
343
586
  return self._direction(
344
587
  direction="below",
345
588
  size=height,
@@ -348,6 +591,8 @@ class DirectionalMixin:
348
591
  until=until,
349
592
  include_endpoint=include_endpoint,
350
593
  offset=offset,
594
+ apply_exclusions=apply_exclusions,
595
+ multipage=multipage,
351
596
  **kwargs,
352
597
  )
353
598
 
@@ -358,9 +603,11 @@ class DirectionalMixin:
358
603
  include_source: bool = False,
359
604
  until: Optional[str] = None,
360
605
  include_endpoint: bool = True,
361
- offset: float = 0.1,
606
+ offset: Optional[float] = None,
607
+ apply_exclusions: bool = True,
608
+ multipage: bool = False,
362
609
  **kwargs,
363
- ) -> "Region":
610
+ ) -> Union["Region", "FlowRegion"]:
364
611
  """
365
612
  Select region to the left of this element/region.
366
613
 
@@ -370,7 +617,10 @@ class DirectionalMixin:
370
617
  include_source: Whether to include this element/region in the result (default: False)
371
618
  until: Optional selector string to specify a left boundary element
372
619
  include_endpoint: Whether to include the boundary element in the region (default: True)
373
- offset: Pixel offset when excluding source/endpoint (default: 0.1)
620
+ offset: Pixel offset when excluding source/endpoint (default: None, uses natural_pdf.options.layout.directional_offset)
621
+ apply_exclusions: Whether to respect exclusions when using 'until' selector (default: True)
622
+ multipage: If True, allows the region to span multiple pages. Returns FlowRegion
623
+ if the result spans multiple pages, Region otherwise (default: False)
374
624
  **kwargs: Additional parameters
375
625
 
376
626
  Returns:
@@ -388,6 +638,10 @@ class DirectionalMixin:
388
638
  table.left(height=100) # Gets 100pt tall region to the left
389
639
  ```
390
640
  """
641
+ # Use global default if offset not provided
642
+ if offset is None:
643
+ offset = natural_pdf.options.layout.directional_offset
644
+
391
645
  return self._direction(
392
646
  direction="left",
393
647
  size=width,
@@ -396,6 +650,8 @@ class DirectionalMixin:
396
650
  until=until,
397
651
  include_endpoint=include_endpoint,
398
652
  offset=offset,
653
+ apply_exclusions=apply_exclusions,
654
+ multipage=multipage,
399
655
  **kwargs,
400
656
  )
401
657
 
@@ -406,9 +662,11 @@ class DirectionalMixin:
406
662
  include_source: bool = False,
407
663
  until: Optional[str] = None,
408
664
  include_endpoint: bool = True,
409
- offset: float = 0.1,
665
+ offset: Optional[float] = None,
666
+ apply_exclusions: bool = True,
667
+ multipage: bool = False,
410
668
  **kwargs,
411
- ) -> "Region":
669
+ ) -> Union["Region", "FlowRegion"]:
412
670
  """
413
671
  Select region to the right of this element/region.
414
672
 
@@ -418,7 +676,10 @@ class DirectionalMixin:
418
676
  include_source: Whether to include this element/region in the result (default: False)
419
677
  until: Optional selector string to specify a right boundary element
420
678
  include_endpoint: Whether to include the boundary element in the region (default: True)
421
- offset: Pixel offset when excluding source/endpoint (default: 0.1)
679
+ offset: Pixel offset when excluding source/endpoint (default: None, uses natural_pdf.options.layout.directional_offset)
680
+ apply_exclusions: Whether to respect exclusions when using 'until' selector (default: True)
681
+ multipage: If True, allows the region to span multiple pages. Returns FlowRegion
682
+ if the result spans multiple pages, Region otherwise (default: False)
422
683
  **kwargs: Additional parameters
423
684
 
424
685
  Returns:
@@ -436,6 +697,10 @@ class DirectionalMixin:
436
697
  label.right(height=50) # Gets 50pt tall region to the right
437
698
  ```
438
699
  """
700
+ # Use global default if offset not provided
701
+ if offset is None:
702
+ offset = natural_pdf.options.layout.directional_offset
703
+
439
704
  return self._direction(
440
705
  direction="right",
441
706
  size=width,
@@ -444,6 +709,8 @@ class DirectionalMixin:
444
709
  until=until,
445
710
  include_endpoint=include_endpoint,
446
711
  offset=offset,
712
+ apply_exclusions=apply_exclusions,
713
+ multipage=multipage,
447
714
  **kwargs,
448
715
  )
449
716
 
@@ -451,7 +718,7 @@ class DirectionalMixin:
451
718
  return self.expand()
452
719
 
453
720
  @overload
454
- def expand(self, amount: float) -> "Region":
721
+ def expand(self, amount: float, *, apply_exclusions: bool = True) -> "Region":
455
722
  """Expand in all directions by the same amount."""
456
723
  ...
457
724
 
@@ -459,12 +726,13 @@ class DirectionalMixin:
459
726
  def expand(
460
727
  self,
461
728
  *,
462
- left: float = 0,
463
- right: float = 0,
464
- top: float = 0,
465
- bottom: float = 0,
729
+ left: Union[float, bool, str] = 0,
730
+ right: Union[float, bool, str] = 0,
731
+ top: Union[float, bool, str] = 0,
732
+ bottom: Union[float, bool, str] = 0,
466
733
  width_factor: float = 1.0,
467
734
  height_factor: float = 1.0,
735
+ apply_exclusions: bool = True,
468
736
  ) -> "Region":
469
737
  """Expand by different amounts in each direction."""
470
738
  ...
@@ -472,24 +740,29 @@ class DirectionalMixin:
472
740
  def expand(
473
741
  self,
474
742
  amount: Optional[float] = None,
475
- left: float = 0,
476
- right: float = 0,
477
- top: float = 0,
478
- bottom: float = 0,
743
+ left: Union[float, bool, str] = 0,
744
+ right: Union[float, bool, str] = 0,
745
+ top: Union[float, bool, str] = 0,
746
+ bottom: Union[float, bool, str] = 0,
479
747
  width_factor: float = 1.0,
480
748
  height_factor: float = 1.0,
749
+ apply_exclusions: bool = True,
481
750
  ) -> "Region":
482
751
  """
483
752
  Create a new region expanded from this element/region.
484
753
 
485
754
  Args:
486
755
  amount: If provided as the first positional argument, expand all edges by this amount
487
- left: Amount to expand left edge (positive value expands leftwards)
488
- right: Amount to expand right edge (positive value expands rightwards)
489
- top: Amount to expand top edge (positive value expands upwards)
490
- bottom: Amount to expand bottom edge (positive value expands downwards)
756
+ left: Amount to expand left edge:
757
+ - float: Fixed pixel expansion
758
+ - True: Expand to page edge
759
+ - str: Selector to expand until (excludes target by default, prefix with '+' to include)
760
+ right: Amount to expand right edge (same options as left)
761
+ top: Amount to expand top edge (same options as left)
762
+ bottom: Amount to expand bottom edge (same options as left)
491
763
  width_factor: Factor to multiply width by (applied after absolute expansion)
492
764
  height_factor: Factor to multiply height by (applied after absolute expansion)
765
+ apply_exclusions: Whether to respect exclusions when using selectors (default: True)
493
766
 
494
767
  Returns:
495
768
  New expanded Region object
@@ -501,31 +774,108 @@ class DirectionalMixin:
501
774
  # Expand by different amounts in each direction
502
775
  expanded = element.expand(left=10, right=5, top=3, bottom=7)
503
776
 
777
+ # Expand to page edges
778
+ expanded = element.expand(left=True, right=True) # Full width
779
+
780
+ # Expand until specific elements
781
+ statute = page.find('text:contains("Statute")')
782
+ expanded = statute.expand(right='text:contains("Repeat?")') # Excludes "Repeat?"
783
+ expanded = statute.expand(right='+text:contains("Repeat?")') # Includes "Repeat?"
784
+
504
785
  # Use width/height factors
505
786
  expanded = element.expand(width_factor=1.5, height_factor=2.0)
506
787
  """
507
788
  # If amount is provided as first positional argument, use it for all directions
508
789
  if amount is not None:
509
790
  left = right = top = bottom = amount
510
- # Start with current coordinates
511
- new_x0 = self.x0
512
- new_x1 = self.x1
513
- new_top = self.top
514
- new_bottom = self.bottom
515
-
516
- # Apply absolute expansions first
517
- new_x0 -= left
518
- new_x1 += right
519
- new_top -= top # Expand upward (decrease top coordinate)
520
- new_bottom += bottom # Expand downward (increase bottom coordinate)
791
+
792
+ # Helper function to process expansion values
793
+ def process_expansion(value, direction):
794
+ """Process expansion value and return the new coordinate."""
795
+ is_horizontal = direction in ("left", "right")
796
+ is_positive = direction in ("right", "bottom")
797
+
798
+ # Get current bounds
799
+ if is_horizontal:
800
+ current_edge = self.x1 if is_positive else self.x0
801
+ page_limit = self.page.width if is_positive else 0
802
+ else:
803
+ current_edge = self.bottom if is_positive else self.top
804
+ page_limit = self.page.height if is_positive else 0
805
+
806
+ # Handle boolean True - expand to page edge
807
+ if value is True:
808
+ return page_limit
809
+
810
+ # Handle numeric values - fixed pixel expansion
811
+ elif isinstance(value, (int, float)):
812
+ if is_positive:
813
+ return current_edge + value
814
+ else:
815
+ return current_edge - value
816
+
817
+ # Handle string selectors - use directional methods
818
+ elif isinstance(value, str):
819
+ # Check if we should include the endpoint
820
+ include_endpoint = value.startswith("+")
821
+ selector = value[1:] if include_endpoint else value
822
+
823
+ # Use directional methods to get the region
824
+ if direction == "left":
825
+ region = self.left(
826
+ until=selector,
827
+ include_endpoint=include_endpoint,
828
+ include_source=True,
829
+ apply_exclusions=apply_exclusions,
830
+ )
831
+ return region.x0
832
+ elif direction == "right":
833
+ region = self.right(
834
+ until=selector,
835
+ include_endpoint=include_endpoint,
836
+ include_source=True,
837
+ apply_exclusions=apply_exclusions,
838
+ )
839
+ return region.x1
840
+ elif direction == "top":
841
+ region = self.above(
842
+ until=selector,
843
+ include_endpoint=include_endpoint,
844
+ include_source=True,
845
+ width="element",
846
+ apply_exclusions=apply_exclusions,
847
+ )
848
+ return region.top
849
+ elif direction == "bottom":
850
+ region = self.below(
851
+ until=selector,
852
+ include_endpoint=include_endpoint,
853
+ include_source=True,
854
+ width="element",
855
+ apply_exclusions=apply_exclusions,
856
+ )
857
+ return region.bottom
858
+
859
+ # Should not reach here
860
+ return current_edge
861
+
862
+ else:
863
+ # Invalid value type, return current edge
864
+ return current_edge
865
+
866
+ # Process each direction
867
+ new_x0 = process_expansion(left, "left") if left else self.x0
868
+ new_x1 = process_expansion(right, "right") if right else self.x1
869
+ new_top = process_expansion(top, "top") if top else self.top
870
+ new_bottom = process_expansion(bottom, "bottom") if bottom else self.bottom
521
871
 
522
872
  # Apply percentage factors if provided
523
873
  if width_factor != 1.0 or height_factor != 1.0:
524
- # Calculate center point *after* absolute expansion
874
+ # Calculate center point *after* expansion
525
875
  center_x = (new_x0 + new_x1) / 2
526
876
  center_y = (new_top + new_bottom) / 2
527
877
 
528
- # Calculate current width and height *after* absolute expansion
878
+ # Calculate current width and height *after* expansion
529
879
  current_width = new_x1 - new_x0
530
880
  current_height = new_bottom - new_top
531
881
 
@@ -1210,7 +1560,22 @@ class Element(
1210
1560
  return self
1211
1561
 
1212
1562
  def exclude(self):
1213
- self.page.add_exclusion(self)
1563
+ """
1564
+ Exclude this element from text extraction and other operations.
1565
+
1566
+ For Region elements, this excludes everything within the region's bounds.
1567
+ For other elements (like TextElement), this excludes only the specific element,
1568
+ not the entire area it occupies.
1569
+ """
1570
+ from natural_pdf.elements.region import Region
1571
+
1572
+ # Use 'region' method for Region objects, 'element' method for everything else
1573
+ if isinstance(self, Region):
1574
+ method = "region"
1575
+ else:
1576
+ method = "element"
1577
+
1578
+ self.page.add_exclusion(self, method=method)
1214
1579
 
1215
1580
  def _get_render_specs(
1216
1581
  self,