natural-pdf 0.2.5__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -460,6 +460,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
460
460
  end_elements=None,
461
461
  new_section_on_page_break=False,
462
462
  include_boundaries="both",
463
+ orientation="vertical",
463
464
  ) -> "ElementCollection[Region]":
464
465
  """
465
466
  Extract sections from a page collection based on start/end elements.
@@ -469,6 +470,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
469
470
  end_elements: Elements or selector string that mark the end of sections (optional)
470
471
  new_section_on_page_break: Whether to start a new section at page boundaries (default: False)
471
472
  include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none' (default: 'both')
473
+ orientation: 'vertical' (default) or 'horizontal' - determines section direction
472
474
 
473
475
  Returns:
474
476
  List of Region objects representing the extracted sections
@@ -511,6 +513,9 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
511
513
  next_page = self.pages[i + 1]
512
514
  top_region = Region(next_page, (0, 0, next_page.width, 1))
513
515
  top_region.is_page_boundary = True # Mark it as a special boundary
516
+ # If start_elements is None, initialize it as an empty list
517
+ if start_elements is None:
518
+ start_elements = []
514
519
  start_elements.append(top_region)
515
520
 
516
521
  # Get all elements from all pages and sort them in document order
@@ -532,16 +537,23 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
532
537
  first_page = self.pages[0]
533
538
  first_start = Region(first_page, (0, 0, first_page.width, 1))
534
539
  first_start.is_implicit_start = True
540
+ # Don't mark this as created from any end element, so it can pair with any end
535
541
  start_elements.append(first_start)
536
542
 
537
543
  # For each end element (except the last), add an implicit start after it
538
- sorted_end_elements = sorted(end_elements, key=lambda e: (e.page.index, e.top, e.x0))
544
+ # Sort by page, then top, then bottom (for elements with same top), then x0
545
+ sorted_end_elements = sorted(
546
+ end_elements, key=lambda e: (e.page.index, e.top, e.bottom, e.x0)
547
+ )
539
548
  for i, end_elem in enumerate(sorted_end_elements[:-1]): # Exclude last end element
540
549
  # Create implicit start element right after this end element
541
550
  implicit_start = Region(
542
551
  end_elem.page, (0, end_elem.bottom, end_elem.page.width, end_elem.bottom + 1)
543
552
  )
544
553
  implicit_start.is_implicit_start = True
554
+ # Track which end element this implicit start was created from
555
+ # to avoid pairing them together (which would create zero height)
556
+ implicit_start.created_from_end = end_elem
545
557
  start_elements.append(implicit_start)
546
558
 
547
559
  # Mark section boundaries
@@ -606,17 +618,20 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
606
618
 
607
619
  # Sort boundaries by page index, then by actual document position
608
620
  def _sort_key(boundary):
609
- """Sort boundaries by (page_idx, vertical_top, priority)."""
621
+ """Sort boundaries by (page_idx, position, priority)."""
610
622
  page_idx = boundary["page_idx"]
611
623
  element = boundary["element"]
612
624
 
613
- # Vertical position on the page
614
- y_pos = getattr(element, "top", 0.0)
625
+ # Position on the page based on orientation
626
+ if orientation == "vertical":
627
+ pos = getattr(element, "top", 0.0)
628
+ else: # horizontal
629
+ pos = getattr(element, "x0", 0.0)
615
630
 
616
631
  # Ensure starts come before ends at the same coordinate
617
632
  priority = 0 if boundary["type"] == "start" else 1
618
633
 
619
- return (page_idx, y_pos, priority)
634
+ return (page_idx, pos, priority)
620
635
 
621
636
  section_boundaries.sort(key=_sort_key)
622
637
 
@@ -624,10 +639,17 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
624
639
  sections = []
625
640
 
626
641
  # --- Helper: build a FlowRegion spanning multiple pages ---
627
- def _build_flow_region(start_el, end_el):
628
- """Return a FlowRegion that covers from *start_el* to *end_el* (inclusive).
629
- If *end_el* is None, the region continues to the bottom of the last
630
- page in this PageCollection."""
642
+ def _build_flow_region(start_el, end_el, include_boundaries="both", orientation="vertical"):
643
+ """Return a FlowRegion that covers from *start_el* to *end_el*.
644
+ If *end_el* is None, the region continues to the bottom/right of the last
645
+ page in this PageCollection.
646
+
647
+ Args:
648
+ start_el: Start element
649
+ end_el: End element
650
+ include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none'
651
+ orientation: 'vertical' or 'horizontal' - determines section direction
652
+ """
631
653
  # Local imports to avoid top-level cycles
632
654
  from natural_pdf.elements.region import Region
633
655
  from natural_pdf.flows.element import FlowElement
@@ -639,12 +661,24 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
639
661
 
640
662
  parts: list[Region] = []
641
663
 
642
- # Use the actual top of the start element (for implicit starts this is
643
- # the bottom of the previous end element) instead of forcing to 0.
644
- start_top = start_el.top
645
-
646
- # Slice of first page beginning at *start_top*
647
- parts.append(Region(start_pg, (0, start_top, start_pg.width, start_pg.height)))
664
+ if orientation == "vertical":
665
+ # Determine the start_top based on include_boundaries
666
+ start_top = start_el.top
667
+ if include_boundaries == "none" or include_boundaries == "end":
668
+ # Exclude start boundary
669
+ start_top = start_el.bottom if hasattr(start_el, "bottom") else start_el.top
670
+
671
+ # Slice of first page beginning at *start_top*
672
+ parts.append(Region(start_pg, (0, start_top, start_pg.width, start_pg.height)))
673
+ else: # horizontal
674
+ # Determine the start_left based on include_boundaries
675
+ start_left = start_el.x0
676
+ if include_boundaries == "none" or include_boundaries == "end":
677
+ # Exclude start boundary
678
+ start_left = start_el.x1 if hasattr(start_el, "x1") else start_el.x0
679
+
680
+ # Slice of first page beginning at *start_left*
681
+ parts.append(Region(start_pg, (start_left, 0, start_pg.width, start_pg.height)))
648
682
 
649
683
  # Full middle pages
650
684
  for pg_idx in range(start_pg.index + 1, end_pg.index):
@@ -653,10 +687,32 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
653
687
 
654
688
  # Slice of last page (if distinct)
655
689
  if end_pg is not start_pg:
656
- bottom = end_el.bottom if end_el is not None else end_pg.height
657
- parts.append(Region(end_pg, (0, 0, end_pg.width, bottom)))
690
+ if orientation == "vertical":
691
+ # Determine the bottom based on include_boundaries
692
+ if end_el is not None:
693
+ if include_boundaries == "none" or include_boundaries == "start":
694
+ # Exclude end boundary
695
+ bottom = end_el.top if hasattr(end_el, "top") else end_el.bottom
696
+ else:
697
+ # Include end boundary
698
+ bottom = end_el.bottom
699
+ else:
700
+ bottom = end_pg.height
701
+ parts.append(Region(end_pg, (0, 0, end_pg.width, bottom)))
702
+ else: # horizontal
703
+ # Determine the right based on include_boundaries
704
+ if end_el is not None:
705
+ if include_boundaries == "none" or include_boundaries == "start":
706
+ # Exclude end boundary
707
+ right = end_el.x0 if hasattr(end_el, "x0") else end_el.x1
708
+ else:
709
+ # Include end boundary
710
+ right = end_el.x1
711
+ else:
712
+ right = end_pg.width
713
+ parts.append(Region(end_pg, (0, 0, right, end_pg.height)))
658
714
 
659
- flow = Flow(segments=parts, arrangement="vertical")
715
+ flow = Flow(segments=parts, arrangement=orientation)
660
716
  src_fe = FlowElement(physical_object=start_el, flow=flow)
661
717
  return FlowRegion(
662
718
  flow=flow,
@@ -680,26 +736,103 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
680
736
  start_element = current_start["element"]
681
737
  end_element = boundary["element"]
682
738
 
739
+ # Check if this is an implicit start created from this same end element
740
+ # This would create a zero-height section, so skip this pairing
741
+ if (
742
+ hasattr(start_element, "is_implicit_start")
743
+ and hasattr(start_element, "created_from_end")
744
+ and start_element.created_from_end is end_element
745
+ ):
746
+ # Skip this pairing - keep current_start for next end element
747
+ continue
748
+
683
749
  # If both elements are on the same page, use the page's get_section_between
684
750
  if start_element.page == end_element.page:
685
751
  # For implicit start elements, create a region from the top of the page
686
752
  if hasattr(start_element, "is_implicit_start"):
687
753
  from natural_pdf.elements.region import Region
688
754
 
689
- section = Region(
690
- start_element.page,
691
- (0, start_element.top, start_element.page.width, end_element.bottom),
692
- )
755
+ # Adjust boundaries based on include_boundaries parameter and orientation
756
+ if orientation == "vertical":
757
+ top = start_element.top
758
+ bottom = end_element.bottom
759
+
760
+ if include_boundaries == "none":
761
+ # Exclude both boundaries - move past them
762
+ top = (
763
+ start_element.bottom
764
+ if hasattr(start_element, "bottom")
765
+ else start_element.top
766
+ )
767
+ bottom = (
768
+ end_element.top
769
+ if hasattr(end_element, "top")
770
+ else end_element.bottom
771
+ )
772
+ elif include_boundaries == "start":
773
+ # Include start, exclude end
774
+ bottom = (
775
+ end_element.top
776
+ if hasattr(end_element, "top")
777
+ else end_element.bottom
778
+ )
779
+ elif include_boundaries == "end":
780
+ # Exclude start, include end
781
+ top = (
782
+ start_element.bottom
783
+ if hasattr(start_element, "bottom")
784
+ else start_element.top
785
+ )
786
+ # "both" is default - no adjustment needed
787
+
788
+ section = Region(
789
+ start_element.page,
790
+ (0, top, start_element.page.width, bottom),
791
+ )
792
+ else: # horizontal
793
+ left = start_element.x0
794
+ right = end_element.x1
795
+
796
+ if include_boundaries == "none":
797
+ # Exclude both boundaries - move past them
798
+ left = (
799
+ start_element.x1
800
+ if hasattr(start_element, "x1")
801
+ else start_element.x0
802
+ )
803
+ right = (
804
+ end_element.x0 if hasattr(end_element, "x0") else end_element.x1
805
+ )
806
+ elif include_boundaries == "start":
807
+ # Include start, exclude end
808
+ right = (
809
+ end_element.x0 if hasattr(end_element, "x0") else end_element.x1
810
+ )
811
+ elif include_boundaries == "end":
812
+ # Exclude start, include end
813
+ left = (
814
+ start_element.x1
815
+ if hasattr(start_element, "x1")
816
+ else start_element.x0
817
+ )
818
+ # "both" is default - no adjustment needed
819
+
820
+ section = Region(
821
+ start_element.page,
822
+ (left, 0, right, start_element.page.height),
823
+ )
693
824
  section.start_element = start_element
694
825
  section.boundary_element_found = end_element
695
826
  else:
696
827
  section = start_element.page.get_section_between(
697
- start_element, end_element, include_boundaries
828
+ start_element, end_element, include_boundaries, orientation
698
829
  )
699
830
  sections.append(section)
700
831
  else:
701
832
  # Create FlowRegion spanning pages
702
- flow_region = _build_flow_region(start_element, end_element)
833
+ flow_region = _build_flow_region(
834
+ start_element, end_element, include_boundaries, orientation
835
+ )
703
836
  sections.append(flow_region)
704
837
 
705
838
  current_start = None
@@ -709,35 +842,84 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
709
842
  # Create a section from current_start to just before this boundary
710
843
  start_element = current_start["element"]
711
844
 
712
- # Find the last element before this boundary on the same page
845
+ # Create section from current start to just before this new start
713
846
  if start_element.page == boundary["element"].page:
714
- # Find elements on this page
715
- page_elements = [e for e in all_elements if e.page == start_element.page]
716
- # Sort by position
717
- page_elements.sort(key=lambda e: (e.top, e.x0))
718
-
719
- # Find the last element before the boundary
720
- end_idx = (
721
- page_elements.index(boundary["element"]) - 1
722
- if boundary["element"] in page_elements
723
- else -1
724
- )
725
- end_element = page_elements[end_idx] if end_idx >= 0 else None
847
+ from natural_pdf.elements.region import Region
726
848
 
727
- # Create the section
728
- section = start_element.page.get_section_between(
729
- start_element, end_element, include_boundaries
730
- )
731
- sections.append(section)
849
+ next_start = boundary["element"]
850
+
851
+ # Create section based on orientation
852
+ if orientation == "vertical":
853
+ # Determine vertical bounds
854
+ if include_boundaries in ["start", "both"]:
855
+ top = start_element.top
856
+ else:
857
+ top = start_element.bottom
858
+
859
+ # The section ends just before the next start
860
+ bottom = next_start.top
861
+
862
+ # Create the section with full page width
863
+ if top < bottom:
864
+ section = Region(
865
+ start_element.page, (0, top, start_element.page.width, bottom)
866
+ )
867
+ section.start_element = start_element
868
+ sections.append(section)
869
+ else: # horizontal
870
+ # Determine horizontal bounds
871
+ if include_boundaries in ["start", "both"]:
872
+ left = start_element.x0
873
+ else:
874
+ left = start_element.x1
875
+
876
+ # The section ends just before the next start
877
+ right = next_start.x0
878
+
879
+ # Create the section with full page height
880
+ if left < right:
881
+ section = Region(
882
+ start_element.page, (left, 0, right, start_element.page.height)
883
+ )
884
+ section.start_element = start_element
885
+ sections.append(section)
732
886
  else:
733
887
  # Cross-page section - create from current_start to the end of its page
734
888
  from natural_pdf.elements.region import Region
735
889
 
736
890
  start_page = start_element.page
737
891
 
738
- # Handle implicit start elements
739
- start_top = start_element.top
740
- region = Region(start_page, (0, start_top, start_page.width, start_page.height))
892
+ # Handle implicit start elements and respect include_boundaries
893
+ if orientation == "vertical":
894
+ if include_boundaries in ["none", "end"]:
895
+ # Exclude start boundary
896
+ start_top = (
897
+ start_element.bottom
898
+ if hasattr(start_element, "bottom")
899
+ else start_element.top
900
+ )
901
+ else:
902
+ # Include start boundary
903
+ start_top = start_element.top
904
+
905
+ region = Region(
906
+ start_page, (0, start_top, start_page.width, start_page.height)
907
+ )
908
+ else: # horizontal
909
+ if include_boundaries in ["none", "end"]:
910
+ # Exclude start boundary
911
+ start_left = (
912
+ start_element.x1
913
+ if hasattr(start_element, "x1")
914
+ else start_element.x0
915
+ )
916
+ else:
917
+ # Include start boundary
918
+ start_left = start_element.x0
919
+
920
+ region = Region(
921
+ start_page, (start_left, 0, start_page.width, start_page.height)
922
+ )
741
923
  region.start_element = start_element
742
924
  sections.append(region)
743
925
 
@@ -753,19 +935,48 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
753
935
  # on the last page of the collection
754
936
  last_page = self.pages[-1]
755
937
  last_page_elements = [e for e in all_elements if e.page == last_page]
756
- last_page_elements.sort(key=lambda e: (e.top, e.x0))
938
+ if orientation == "vertical":
939
+ last_page_elements.sort(key=lambda e: (e.top, e.x0))
940
+ else: # horizontal
941
+ last_page_elements.sort(key=lambda e: (e.x0, e.top))
757
942
  end_element = last_page_elements[-1] if last_page_elements else None
758
943
 
759
944
  # Create FlowRegion spanning multiple pages using helper
760
- flow_region = _build_flow_region(start_element, end_element)
945
+ flow_region = _build_flow_region(
946
+ start_element, end_element, include_boundaries, orientation
947
+ )
761
948
  sections.append(flow_region)
762
949
  else:
763
950
  # With start_elements only, create a section to the end of the current page
764
951
  from natural_pdf.elements.region import Region
765
952
 
766
- # Handle implicit start elements
767
- start_top = start_element.top
768
- region = Region(start_page, (0, start_top, start_page.width, start_page.height))
953
+ # Handle implicit start elements and respect include_boundaries
954
+ if orientation == "vertical":
955
+ if include_boundaries in ["none", "end"]:
956
+ # Exclude start boundary
957
+ start_top = (
958
+ start_element.bottom
959
+ if hasattr(start_element, "bottom")
960
+ else start_element.top
961
+ )
962
+ else:
963
+ # Include start boundary
964
+ start_top = start_element.top
965
+
966
+ region = Region(start_page, (0, start_top, start_page.width, start_page.height))
967
+ else: # horizontal
968
+ if include_boundaries in ["none", "end"]:
969
+ # Exclude start boundary
970
+ start_left = (
971
+ start_element.x1 if hasattr(start_element, "x1") else start_element.x0
972
+ )
973
+ else:
974
+ # Include start boundary
975
+ start_left = start_element.x0
976
+
977
+ region = Region(
978
+ start_page, (start_left, 0, start_page.width, start_page.height)
979
+ )
769
980
  region.start_element = start_element
770
981
  sections.append(region)
771
982
 
@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Optional,
7
7
 
8
8
  from tqdm.auto import tqdm
9
9
 
10
+ from natural_pdf.utils.color_utils import format_color_value
11
+
10
12
  if TYPE_CHECKING:
11
13
  from natural_pdf.core.page import Page
12
14
  from natural_pdf.core.page_collection import PageCollection
@@ -201,7 +203,15 @@ class PageGroupBy:
201
203
  """
202
204
  groups = self._compute_groups()
203
205
  for key, pages in groups.items():
204
- print(f"\n--- Group: {key} ({len(pages)} pages) ---")
206
+ # Format the key for display, converting colors to hex if needed
207
+ if isinstance(self.by, str):
208
+ # If grouped by a string selector, check if it's a color attribute
209
+ formatted_key = format_color_value(key, attr_name=self.by)
210
+ else:
211
+ # For callable grouping, try to format as color
212
+ formatted_key = format_color_value(key)
213
+
214
+ print(f"\n--- Group: {formatted_key} ({len(pages)} pages) ---")
205
215
  pages.show(**kwargs)
206
216
 
207
217
  def __len__(self) -> int:
@@ -220,7 +230,15 @@ class PageGroupBy:
220
230
  print("-" * 40)
221
231
 
222
232
  for i, (key, pages) in enumerate(groups.items()):
223
- key_display = f"'{key}'" if key is not None else "None"
233
+ if key is None:
234
+ key_display = "None"
235
+ else:
236
+ # Format the key for display, converting colors to hex if needed
237
+ if isinstance(self.by, str):
238
+ formatted_key = format_color_value(key, attr_name=self.by)
239
+ else:
240
+ formatted_key = format_color_value(key)
241
+ key_display = f"'{formatted_key}'"
224
242
  print(f"[{i}] {key_display}: {len(pages)} pages")
225
243
 
226
244
  def __repr__(self) -> str:
natural_pdf/core/pdf.py CHANGED
@@ -252,6 +252,16 @@ class _LazyPageList(Sequence):
252
252
  logger.warning(f"Failed to apply region to page {cached.number}: {e}")
253
253
 
254
254
  self._cache[index] = cached
255
+
256
+ # Also cache in the parent PDF's main page list if this is a slice
257
+ if (
258
+ hasattr(self._parent_pdf, "_pages")
259
+ and hasattr(self._parent_pdf._pages, "_cache")
260
+ and actual_page_index < len(self._parent_pdf._pages._cache)
261
+ and self._parent_pdf._pages._cache[actual_page_index] is None
262
+ ):
263
+ self._parent_pdf._pages._cache[actual_page_index] = cached
264
+
255
265
  return cached
256
266
 
257
267
  # Sequence protocol ---------------------------------------------------
@@ -720,26 +730,16 @@ class PDF(
720
730
  # Store for bookkeeping and lazy application
721
731
  self._exclusions.append((exclusion_func, label))
722
732
 
723
- # Apply only to already-created (cached) pages to avoid forcing page creation
724
- for i in range(len(self._pages)):
725
- if self._pages._cache[i] is not None: # Only apply to existing pages
726
- try:
727
- self._pages._cache[i].add_exclusion(exclusion_func, label=label)
728
- except Exception as e:
729
- logger.warning(f"Failed to apply exclusion to existing page {i}: {e}")
733
+ # Don't modify already-cached pages - they will get PDF-level exclusions
734
+ # dynamically through _get_exclusion_regions()
730
735
  return self
731
736
 
732
737
  # Fallback to original callable / Region behaviour ------------------
733
738
  exclusion_data = (exclusion_func, label)
734
739
  self._exclusions.append(exclusion_data)
735
740
 
736
- # Apply only to already-created (cached) pages to avoid forcing page creation
737
- for i in range(len(self._pages)):
738
- if self._pages._cache[i] is not None: # Only apply to existing pages
739
- try:
740
- self._pages._cache[i].add_exclusion(exclusion_func, label=label)
741
- except Exception as e:
742
- logger.warning(f"Failed to apply exclusion to existing page {i}: {e}")
741
+ # Don't modify already-cached pages - they will get PDF-level exclusions
742
+ # dynamically through _get_exclusion_regions()
743
743
 
744
744
  return self
745
745
 
@@ -1280,6 +1280,7 @@ class PDF(
1280
1280
  end_elements=None,
1281
1281
  new_section_on_page_break=False,
1282
1282
  include_boundaries="both",
1283
+ orientation="vertical",
1283
1284
  ) -> "ElementCollection":
1284
1285
  """
1285
1286
  Extract sections from the entire PDF based on start/end elements.
@@ -1292,6 +1293,7 @@ class PDF(
1292
1293
  end_elements: Elements or selector string that mark the end of sections (optional)
1293
1294
  new_section_on_page_break: Whether to start a new section at page boundaries (default: False)
1294
1295
  include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none' (default: 'both')
1296
+ orientation: 'vertical' (default) or 'horizontal' - determines section direction
1295
1297
 
1296
1298
  Returns:
1297
1299
  ElementCollection of Region objects representing the extracted sections
@@ -1328,6 +1330,7 @@ class PDF(
1328
1330
  end_elements=end_elements,
1329
1331
  new_section_on_page_break=new_section_on_page_break,
1330
1332
  include_boundaries=include_boundaries,
1333
+ orientation=orientation,
1331
1334
  )
1332
1335
 
1333
1336
  def save_searchable(self, output_path: Union[str, "Path"], dpi: int = 300, **kwargs):
@@ -196,7 +196,7 @@ class Visualizable:
196
196
  columns: Optional[int] = 6, # For grid layout, defaults to 6 columns
197
197
  limit: Optional[int] = 30, # Max pages to show (default 30)
198
198
  # Cropping options
199
- crop: Union[bool, Literal["content"]] = False,
199
+ crop: Union[bool, int, str, "Region", Literal["wide"]] = False,
200
200
  crop_bbox: Optional[Tuple[float, float, float, float]] = None,
201
201
  **kwargs,
202
202
  ) -> Optional["PIL_Image"]:
@@ -219,7 +219,12 @@ class Visualizable:
219
219
  gap: Pixels between stacked images
220
220
  columns: Number of columns for grid layout (defaults to 6)
221
221
  limit: Maximum number of pages to display (default 30, None for all)
222
- crop: Whether to crop (True, False, or 'content' for bbox of elements)
222
+ crop: Cropping mode:
223
+ - False: No cropping (default)
224
+ - True: Tight crop to element bounds
225
+ - int: Padding in pixels around element
226
+ - 'wide': Full page width, cropped vertically to element
227
+ - Region: Crop to the bounds of another region
223
228
  crop_bbox: Explicit crop bounds
224
229
  **kwargs: Additional parameters passed to rendering
225
230
 
@@ -230,6 +235,11 @@ class Visualizable:
230
235
  if isinstance(annotate, str):
231
236
  annotate = [annotate]
232
237
 
238
+ # Handle 'cols' as an alias for 'columns' for backward compatibility
239
+ if "cols" in kwargs and columns == 6: # Only use cols if columns wasn't explicitly set
240
+ columns = kwargs.pop("cols")
241
+ logger.info(f"Using 'cols' parameter as alias for 'columns': {columns}")
242
+
233
243
  # Pass limit as max_pages to _get_render_specs
234
244
  if limit is not None:
235
245
  kwargs["max_pages"] = limit
@@ -283,7 +293,7 @@ class Visualizable:
283
293
  gap: int = 5,
284
294
  columns: Optional[int] = None,
285
295
  # Cropping options
286
- crop: Union[bool, Literal["content"]] = False,
296
+ crop: Union[bool, int, str, "Region", Literal["wide"]] = False,
287
297
  crop_bbox: Optional[Tuple[float, float, float, float]] = None,
288
298
  **kwargs,
289
299
  ) -> Optional["PIL_Image"]:
@@ -299,13 +309,18 @@ class Visualizable:
299
309
  stack_direction: Direction for stack layout
300
310
  gap: Pixels between stacked images
301
311
  columns: Number of columns for grid layout
302
- crop: Whether to crop
312
+ crop: Cropping mode (False, True, int for padding, 'wide', or Region)
303
313
  crop_bbox: Explicit crop bounds
304
314
  **kwargs: Additional parameters passed to rendering
305
315
 
306
316
  Returns:
307
317
  PIL Image object or None if nothing to render
308
318
  """
319
+ # Handle 'cols' as an alias for 'columns' for backward compatibility
320
+ if "cols" in kwargs and columns is None: # Only use cols if columns wasn't explicitly set
321
+ columns = kwargs.pop("cols")
322
+ logger.info(f"Using 'cols' parameter as alias for 'columns': {columns}")
323
+
309
324
  specs = self._get_render_specs(mode="render", crop=crop, crop_bbox=crop_bbox, **kwargs)
310
325
 
311
326
  if not specs:
@@ -353,7 +368,7 @@ class Visualizable:
353
368
  stack_direction: Direction for stack layout
354
369
  gap: Pixels between stacked images
355
370
  columns: Number of columns for grid layout
356
- crop: Whether to crop
371
+ crop: Cropping mode (False, True, int for padding, 'wide', or Region)
357
372
  crop_bbox: Explicit crop bounds
358
373
  format: Image format (inferred from path if not specified)
359
374
  **kwargs: Additional parameters passed to rendering
@@ -344,7 +344,7 @@ def _extract_element_value(element: "Element", column: str) -> Any:
344
344
 
345
345
  elif column == "highlight":
346
346
  # If element is highlighted, return its colour; otherwise blank
347
- if getattr(element, "highlight", False):
347
+ if getattr(element, "is_highlighted", False):
348
348
  col_val = getattr(element, "highlight_color", None)
349
349
  if col_val is None:
350
350
  return "True" # fallback if colour missing
@@ -306,7 +306,7 @@ def _analyze_typography(elements: List["Element"]) -> Dict[str, Any]:
306
306
  styles["strikeout"] += 1
307
307
  if getattr(element, "underline", False):
308
308
  styles["underline"] += 1
309
- if getattr(element, "highlight", False):
309
+ if getattr(element, "is_highlighted", False):
310
310
  styles["highlight"] += 1
311
311
 
312
312
  # Color - use TextElement's color property