natural-pdf 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. natural_pdf/__init__.py +7 -2
  2. natural_pdf/analyzers/shape_detection_mixin.py +1092 -0
  3. natural_pdf/analyzers/text_options.py +9 -1
  4. natural_pdf/analyzers/text_structure.py +371 -58
  5. natural_pdf/classification/manager.py +3 -4
  6. natural_pdf/collections/pdf_collection.py +19 -39
  7. natural_pdf/core/element_manager.py +11 -1
  8. natural_pdf/core/highlighting_service.py +146 -75
  9. natural_pdf/core/page.py +287 -188
  10. natural_pdf/core/pdf.py +57 -42
  11. natural_pdf/elements/base.py +51 -0
  12. natural_pdf/elements/collections.py +362 -67
  13. natural_pdf/elements/line.py +5 -0
  14. natural_pdf/elements/region.py +396 -23
  15. natural_pdf/exporters/data/__init__.py +0 -0
  16. natural_pdf/exporters/data/pdf.ttf +0 -0
  17. natural_pdf/exporters/data/sRGB.icc +0 -0
  18. natural_pdf/exporters/hocr.py +40 -61
  19. natural_pdf/exporters/hocr_font.py +7 -13
  20. natural_pdf/exporters/original_pdf.py +10 -13
  21. natural_pdf/exporters/paddleocr.py +51 -11
  22. natural_pdf/exporters/searchable_pdf.py +0 -10
  23. natural_pdf/flows/__init__.py +12 -0
  24. natural_pdf/flows/collections.py +533 -0
  25. natural_pdf/flows/element.py +382 -0
  26. natural_pdf/flows/flow.py +216 -0
  27. natural_pdf/flows/region.py +458 -0
  28. natural_pdf/search/__init__.py +65 -52
  29. natural_pdf/search/lancedb_search_service.py +325 -0
  30. natural_pdf/search/numpy_search_service.py +255 -0
  31. natural_pdf/search/searchable_mixin.py +25 -71
  32. natural_pdf/selectors/parser.py +163 -8
  33. natural_pdf/widgets/viewer.py +22 -31
  34. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/METADATA +55 -49
  35. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/RECORD +38 -30
  36. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/WHEEL +1 -1
  37. natural_pdf/search/haystack_search_service.py +0 -687
  38. natural_pdf/search/haystack_utils.py +0 -474
  39. natural_pdf/utils/tqdm_utils.py +0 -51
  40. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/licenses/LICENSE +0 -0
  41. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/top_level.txt +0 -0
@@ -25,14 +25,12 @@ from typing import (
25
25
  )
26
26
 
27
27
  from PIL import Image
28
- from tqdm import tqdm
29
- from tqdm.auto import tqdm as auto_tqdm
30
- from tqdm.notebook import tqdm as notebook_tqdm
28
+ from tqdm.auto import tqdm
31
29
 
32
- from natural_pdf.utils.tqdm_utils import get_tqdm
30
+ from natural_pdf.exporters.base import FinetuneExporter
33
31
 
34
- # Get the appropriate tqdm class once
35
- tqdm = get_tqdm()
32
+ # Need to import this utility
33
+ from natural_pdf.utils.identifiers import generate_short_path_hash
36
34
 
37
35
  # Set up logger early
38
36
  # Configure logging to include thread information
@@ -67,8 +65,10 @@ except ImportError as e:
67
65
  from natural_pdf.collections.mixins import ApplyMixin
68
66
  from natural_pdf.search.searchable_mixin import SearchableMixin # Import the new mixin
69
67
 
68
+ from natural_pdf.analyzers.shape_detection_mixin import ShapeDetectionMixin
70
69
 
71
- class PDFCollection(SearchableMixin, ApplyMixin, ExportMixin): # Add ExportMixin
70
+
71
+ class PDFCollection(SearchableMixin, ApplyMixin, ExportMixin, ShapeDetectionMixin): # Add ExportMixin and ShapeDetectionMixin
72
72
  def __init__(
73
73
  self,
74
74
  source: Union[str, Iterable[Union[str, "PDF"]]],
@@ -119,16 +119,8 @@ class PDFCollection(SearchableMixin, ApplyMixin, ExportMixin): # Add ExportMixi
119
119
  @staticmethod
120
120
  def _get_pdf_class():
121
121
  """Helper method to dynamically import the PDF class."""
122
- try:
123
- # Import needs to resolve path correctly
124
- from natural_pdf.core.pdf import PDF
125
-
126
- return PDF
127
- except ImportError as e:
128
- logger.error(
129
- "Could not import PDF class from natural_pdf.core.pdf. Ensure it exists and there are no circular imports at runtime."
130
- )
131
- raise ImportError("PDF class is required but could not be imported.") from e
122
+ from natural_pdf.core.pdf import PDF
123
+ return PDF
132
124
 
133
125
  # --- Internal Helpers ---
134
126
 
@@ -141,16 +133,13 @@ class PDFCollection(SearchableMixin, ApplyMixin, ExportMixin): # Add ExportMixi
141
133
  def _execute_glob(self, pattern: str) -> Set[str]:
142
134
  """Glob for paths and return a set of valid PDF paths."""
143
135
  found_paths = set()
144
- try:
145
- # Use iglob for potentially large directories/matches
146
- paths_iter = py_glob.iglob(pattern, recursive=self._recursive)
147
- for path_str in paths_iter:
148
- # Use Path object for easier checking
149
- p = Path(path_str)
150
- if p.is_file() and p.suffix.lower() == ".pdf":
151
- found_paths.add(str(p.resolve())) # Store resolved absolute path
152
- except Exception as e:
153
- logger.error(f"Error processing glob pattern '{pattern}': {e}")
136
+ # Use iglob for potentially large directories/matches
137
+ paths_iter = py_glob.iglob(pattern, recursive=self._recursive)
138
+ for path_str in paths_iter:
139
+ # Use Path object for easier checking
140
+ p = Path(path_str)
141
+ if p.is_file() and p.suffix.lower() == ".pdf":
142
+ found_paths.add(str(p.resolve())) # Store resolved absolute path
154
143
  return found_paths
155
144
 
156
145
  def _resolve_sources_to_paths(self, source: Union[str, Iterable[str]]) -> List[str]:
@@ -534,19 +523,10 @@ class PDFCollection(SearchableMixin, ApplyMixin, ExportMixin): # Add ExportMixi
534
523
  **kwargs: Additional arguments passed to create_correction_task_package
535
524
  (e.g., image_render_scale, overwrite).
536
525
  """
537
- try:
538
- from natural_pdf.utils.packaging import create_correction_task_package
526
+ from natural_pdf.utils.packaging import create_correction_task_package
539
527
 
540
- # Pass the collection itself (self) as the source
541
- create_correction_task_package(source=self, output_zip_path=output_zip_path, **kwargs)
542
- except ImportError:
543
- logger.error(
544
- "Failed to import 'create_correction_task_package'. Packaging utility might be missing."
545
- )
546
- # Or raise
547
- except Exception as e:
548
- logger.error(f"Failed to export correction task for collection: {e}", exc_info=True)
549
- raise # Re-raise the exception from the utility function
528
+ # Pass the collection itself (self) as the source
529
+ create_correction_task_package(source=self, output_zip_path=output_zip_path, **kwargs)
550
530
 
551
531
  # --- Mixin Required Implementation ---
552
532
  def get_indexable_items(self) -> Iterable[Indexable]:
@@ -407,7 +407,17 @@ class ElementManager:
407
407
  char_dict_data = ocr_char_dict # Use the one we already created
408
408
  char_dict_data["object_type"] = "char" # Mark as char type
409
409
  char_dict_data.setdefault("adv", char_dict_data.get("width", 0))
410
- self._elements["chars"].append(char_dict_data) # Append the dictionary
410
+
411
+ # Create a TextElement for the char representation
412
+ # Ensure _char_dicts is handled correctly by TextElement constructor
413
+ # For an OCR word represented as a char, its _char_dicts can be a list containing its own data
414
+ char_element_specific_data = char_dict_data.copy()
415
+ char_element_specific_data["_char_dicts"] = [char_dict_data.copy()]
416
+
417
+ ocr_char_as_element = TextElement(char_element_specific_data, self._page)
418
+ self._elements["chars"].append(
419
+ ocr_char_as_element
420
+ ) # Append TextElement instance
411
421
 
412
422
  except (KeyError, ValueError, TypeError) as e:
413
423
  logger.error(f"Failed to process OCR result: {result}. Error: {e}", exc_info=True)
@@ -215,21 +215,14 @@ class HighlightRenderer:
215
215
  def _render_ocr_text(self):
216
216
  """Renders OCR text onto the image. (Adapted from old HighlightManager)"""
217
217
  # Use the page reference to get OCR elements
218
- try:
219
- # Try finding first, then extracting if necessary
220
- ocr_elements = self.page.find_all("text[source=ocr]")
221
- if not ocr_elements:
222
- # Don't run full OCR here, just extract if already run
223
- ocr_elements = [
224
- el for el in self.page.words if getattr(el, "source", None) == "ocr"
225
- ]
226
- # Alternative: self.page.extract_ocr_elements() - but might be slow
227
-
228
- except Exception as e:
229
- logger.warning(
230
- f"Could not get OCR elements for page {self.page.number}: {e}", exc_info=True
231
- )
232
- return # Don't modify image if OCR elements aren't available
218
+ # Try finding first, then extracting if necessary
219
+ ocr_elements = self.page.find_all("text[source=ocr]")
220
+ if not ocr_elements:
221
+ # Don't run full OCR here, just extract if already run
222
+ ocr_elements = [
223
+ el for el in self.page.words if getattr(el, "source", None) == "ocr"
224
+ ]
225
+ # Alternative: self.page.extract_ocr_elements() - but might be slow
233
226
 
234
227
  if not ocr_elements:
235
228
  logger.debug(f"No OCR elements found for page {self.page.number} to render.")
@@ -293,20 +286,15 @@ class HighlightRenderer:
293
286
  )
294
287
 
295
288
  # Calculate text position (centered vertically, slightly offset from left)
296
- try:
297
- if hasattr(sized_font, "getbbox"): # Modern PIL
298
- _, text_top_offset, _, text_bottom_offset = sized_font.getbbox(element.text)
299
- text_h = text_bottom_offset - text_top_offset
300
- else: # Older PIL approximation
301
- text_h = font_size
302
- text_y = top_s + (box_h - text_h) / 2
303
- # Adjust for vertical offset in some fonts
304
- text_y -= text_top_offset if hasattr(sized_font, "getbbox") else 0
305
- text_x = x0_s + padding # Start near left edge with padding
306
-
307
- except Exception:
308
- # Fallback positioning
309
- text_x, text_y = x0_s + padding, top_s + padding
289
+ if hasattr(sized_font, "getbbox"): # Modern PIL
290
+ _, text_top_offset, _, text_bottom_offset = sized_font.getbbox(element.text)
291
+ text_h = text_bottom_offset - text_top_offset
292
+ else: # Older PIL approximation
293
+ text_h = font_size
294
+ text_y = top_s + (box_h - text_h) / 2
295
+ # Adjust for vertical offset in some fonts
296
+ text_y -= text_top_offset if hasattr(sized_font, "getbbox") else 0
297
+ text_x = x0_s + padding # Start near left edge with padding
310
298
 
311
299
  draw.text((text_x, text_y), element.text, fill=(0, 0, 0, 255), font=sized_font)
312
300
 
@@ -392,9 +380,6 @@ class HighlightingService:
392
380
  except ValueError:
393
381
  logger.warning(f"Invalid color string: '{color_input}'")
394
382
  return None
395
- except Exception as e:
396
- logger.error(f"Error processing color string '{color_input}': {e}")
397
- return None
398
383
  else:
399
384
  logger.warning(f"Invalid color input type: {type(color_input)}")
400
385
  return None
@@ -611,13 +596,13 @@ class HighlightingService:
611
596
 
612
597
  Args:
613
598
  page_index: The 0-based index of the page to render.
614
- scale: Scale factor for rendering highlights.
599
+ scale: Scale factor for rendering highlights if width/height/resolution not in kwargs.
615
600
  labels: Whether to include a legend for highlights.
616
601
  legend_position: Position of the legend.
617
602
  render_ocr: Whether to render OCR text on the image.
618
- resolution: Optional resolution (DPI) for the base page image.
619
- Defaults to scale * 72.
620
- kwargs: Additional keyword arguments for pdfplumber's page.to_image.
603
+ resolution: Optional resolution (DPI) for the base page image if width/height not in kwargs.
604
+ Defaults to scale * 72 if not otherwise specified.
605
+ kwargs: Additional keyword arguments for pdfplumber's page.to_image (e.g., width, height).
621
606
 
622
607
  Returns:
623
608
  A PIL Image object of the rendered page, or None if rendering fails.
@@ -626,34 +611,84 @@ class HighlightingService:
626
611
  logger.error(f"Invalid page index {page_index} for rendering.")
627
612
  return None
628
613
 
629
- page = self._pdf[page_index]
614
+ page_obj = self._pdf[page_index] # Renamed to avoid conflict
630
615
  highlights_on_page = self.get_highlights_for_page(page_index)
631
616
 
632
- render_resolution = resolution if resolution is not None else scale * 72
633
- base_image = render_plain_page(page, render_resolution)
634
- base_image = base_image.convert("RGBA")
635
- logger.debug(
636
- f"Base image for page {page_index} rendered with resolution {render_resolution}."
637
- )
617
+ to_image_args = kwargs.copy()
618
+ actual_scale_x = None
619
+ actual_scale_y = None
620
+
621
+ if "width" in to_image_args and to_image_args["width"] is not None:
622
+ logger.debug(f"Rendering page {page_index} with width={to_image_args['width']}.")
623
+ if "height" in to_image_args: to_image_args.pop("height", None)
624
+ # Actual scale will be calculated after image creation
625
+ elif "height" in to_image_args and to_image_args["height"] is not None:
626
+ logger.debug(f"Rendering page {page_index} with height={to_image_args['height']}.")
627
+ # Actual scale will be calculated after image creation
628
+ else:
629
+ # Use explicit resolution from kwargs if present, then the resolution param, then scale
630
+ render_resolution = to_image_args.pop("resolution", resolution) # Use and remove from kwargs if present
631
+ if render_resolution is None:
632
+ render_resolution = scale * 72
633
+ to_image_args["resolution"] = render_resolution # Add it back for the call
634
+ actual_scale_x = render_resolution / 72.0
635
+ actual_scale_y = render_resolution / 72.0
636
+ logger.debug(f"Rendering page {page_index} with resolution {render_resolution} (scale: {actual_scale_x:.2f}).")
637
+
638
+ try:
639
+ # base_image = render_plain_page(page_obj, actual_scale_x * 72 if actual_scale_x else scale * 72) # Old call
640
+ img_object = page_obj._page.to_image(**to_image_args)
641
+ base_image_pil = (
642
+ img_object.annotated
643
+ if hasattr(img_object, "annotated")
644
+ else img_object._repr_png_()
645
+ )
646
+ if isinstance(base_image_pil, bytes):
647
+ from io import BytesIO
648
+ base_image_pil = Image.open(BytesIO(base_image_pil))
649
+ base_image_pil = base_image_pil.convert("RGBA") # Ensure RGBA for renderer
650
+ logger.debug(
651
+ f"Base image for page {page_index} rendered. Size: {base_image_pil.size}."
652
+ )
653
+
654
+ if actual_scale_x is None or actual_scale_y is None: # If not set by resolution path
655
+ if page_obj.width > 0:
656
+ actual_scale_x = base_image_pil.width / page_obj.width
657
+ else:
658
+ actual_scale_x = scale # Fallback
659
+ if page_obj.height > 0:
660
+ actual_scale_y = base_image_pil.height / page_obj.height
661
+ else:
662
+ actual_scale_y = scale # Fallback
663
+ logger.debug(f"Calculated actual scales for page {page_index}: x={actual_scale_x:.2f}, y={actual_scale_y:.2f}")
664
+
665
+ except IOError as e:
666
+ logger.error(f"IOError creating base image for page {page_index}: {e}")
667
+ raise
668
+ except AttributeError as e:
669
+ logger.error(f"AttributeError creating base image for page {page_index}: {e}")
670
+ raise
671
+
672
+ renderer_scale = actual_scale_x # Assuming aspect ratio maintained, use x_scale
638
673
 
639
674
  # --- Render Highlights ---
640
675
  rendered_image: Image.Image
641
676
  if highlights_on_page:
642
677
  renderer = HighlightRenderer(
643
- page=page,
644
- base_image=base_image,
678
+ page=page_obj,
679
+ base_image=base_image_pil,
645
680
  highlights=highlights_on_page,
646
- scale=scale,
681
+ scale=renderer_scale, # Use the determined actual scale
647
682
  render_ocr=render_ocr,
648
683
  )
649
684
  rendered_image = renderer.render()
650
685
  else:
651
686
  if render_ocr:
652
- # Still render OCR even if no highlights
653
- renderer = HighlightRenderer(page, base_image, [], scale, True)
687
+ # Still render OCR even if no highlights, using the determined actual scale
688
+ renderer = HighlightRenderer(page_obj, base_image_pil, [], renderer_scale, True)
654
689
  rendered_image = renderer.render()
655
690
  else:
656
- rendered_image = base_image # No highlights, no OCR requested
691
+ rendered_image = base_image_pil # No highlights, no OCR requested
657
692
 
658
693
  # --- Add Legend (Based ONLY on this page's highlights) ---
659
694
  if labels:
@@ -697,12 +732,12 @@ class HighlightingService:
697
732
  Args:
698
733
  page_index: Index of the page to render.
699
734
  temporary_highlights: List of highlight data dicts (from ElementCollection._prepare).
700
- scale: Scale factor for rendering.
735
+ scale: Original scale factor for rendering, used if width/height are not provided.
701
736
  labels: Whether to include a legend.
702
737
  legend_position: Position of the legend.
703
738
  render_ocr: Whether to render OCR text.
704
- resolution: Resolution for base page image rendering.
705
- **kwargs: Additional args for pdfplumber's to_image.
739
+ resolution: Resolution for base page image rendering if width/height not used.
740
+ **kwargs: Additional args for pdfplumber's to_image (e.g., width, height).
706
741
 
707
742
  Returns:
708
743
  PIL Image of the preview, or None if rendering fails.
@@ -711,35 +746,64 @@ class HighlightingService:
711
746
  logger.error(f"Invalid page index {page_index} for render_preview.")
712
747
  return None
713
748
 
714
- page = self._pdf.pages[page_index]
715
- render_resolution = resolution if resolution is not None else scale * 72
749
+ page_obj = self._pdf.pages[page_index]
750
+
751
+ to_image_args = kwargs.copy()
752
+ actual_scale_x = None
753
+ actual_scale_y = None
754
+
755
+ # Determine arguments for page._page.to_image()
756
+ if "width" in to_image_args and to_image_args["width"] is not None:
757
+ logger.debug(f"Rendering preview for page {page_index} with width={to_image_args['width']}.")
758
+ # Resolution is implicitly handled by pdfplumber when width is set
759
+ if "height" in to_image_args:
760
+ to_image_args.pop("height", None)
761
+ # after image is created, we will calculate actual_scale_x and actual_scale_y
762
+
763
+ elif "height" in to_image_args and to_image_args["height"] is not None:
764
+ logger.debug(f"Rendering preview for page {page_index} with height={to_image_args['height']}.")
765
+ # Resolution is implicitly handled by pdfplumber when height is set
766
+ # after image is created, we will calculate actual_scale_x and actual_scale_y
767
+ else:
768
+ # Neither width nor height is provided, use resolution or scale.
769
+ render_resolution = resolution if resolution is not None else scale * 72
770
+ to_image_args["resolution"] = render_resolution
771
+ actual_scale_x = render_resolution / 72.0
772
+ actual_scale_y = render_resolution / 72.0
773
+ logger.debug(f"Rendering preview for page {page_index} with resolution={render_resolution} (scale: {actual_scale_x:.2f}).")
716
774
 
717
775
  try:
718
- # Get base image from pdfplumber using the Page object's underlying _page
719
- img_object = page._page.to_image(resolution=render_resolution, **kwargs)
720
- base_image = (
776
+ img_object = page_obj._page.to_image(**to_image_args)
777
+ base_image_pil = (
721
778
  img_object.annotated
722
779
  if hasattr(img_object, "annotated")
723
780
  else img_object._repr_png_()
724
781
  )
725
- if isinstance(base_image, bytes):
782
+ if isinstance(base_image_pil, bytes):
726
783
  from io import BytesIO
784
+ base_image_pil = Image.open(BytesIO(base_image_pil))
785
+ base_image_pil = base_image_pil.convert("RGB")
727
786
 
728
- base_image = Image.open(BytesIO(base_image))
729
- base_image = base_image.convert("RGB") # Ensure consistent format
787
+ # If scale was not determined by resolution, calculate it now from base_image_pil dimensions
788
+ if actual_scale_x is None or actual_scale_y is None:
789
+ if page_obj.width > 0:
790
+ actual_scale_x = base_image_pil.width / page_obj.width
791
+ else:
792
+ actual_scale_x = scale # Fallback to original scale
793
+ if page_obj.height > 0:
794
+ actual_scale_y = base_image_pil.height / page_obj.height
795
+ else:
796
+ actual_scale_y = scale # Fallback to original scale
797
+ logger.debug(f"Calculated actual scales for page {page_index}: x={actual_scale_x:.2f}, y={actual_scale_y:.2f} from image size {base_image_pil.size} and page size ({page_obj.width}, {page_obj.height})")
730
798
 
731
799
  # Convert temporary highlight dicts to Highlight objects
732
- # Note: Colors/labels should be determined *here* for temporary preview
733
800
  preview_highlights = []
734
801
  for hl_data in temporary_highlights:
735
- # Determine the final color using the service logic
736
802
  final_color = self._determine_highlight_color(
737
803
  color_input=hl_data.get("color"),
738
804
  label=hl_data.get("label"),
739
805
  use_color_cycling=hl_data.get("use_color_cycling", False),
740
806
  )
741
-
742
- # Extract potential attributes to draw
743
807
  attrs_to_draw = {}
744
808
  element = hl_data.get("element")
745
809
  include_attrs = hl_data.get("include_attrs")
@@ -753,25 +817,29 @@ class HighlightingService:
753
817
  logger.warning(
754
818
  f"Attribute '{attr_name}' not found on element {element}"
755
819
  )
756
-
757
- # Add highlight if geometry exists
758
820
  if hl_data.get("bbox") or hl_data.get("polygon"):
759
821
  preview_highlights.append(
760
822
  Highlight(
761
823
  page_index=hl_data["page_index"],
762
824
  bbox=hl_data.get("bbox"),
763
825
  polygon=hl_data.get("polygon"),
764
- color=final_color, # Use the determined color
826
+ color=final_color,
765
827
  label=hl_data.get("label"),
766
828
  attributes=attrs_to_draw,
767
829
  )
768
830
  )
769
-
770
- # Render only these highlights
771
- renderer = HighlightRenderer(page, base_image, preview_highlights, scale, render_ocr)
831
+
832
+ # Use the calculated actual_scale_x for the HighlightRenderer
833
+ # Assuming HighlightRenderer can handle a single scale or we adapt it.
834
+ # For now, pdfplumber usually maintains aspect ratio, so one scale should be okay.
835
+ # If not, HighlightRenderer needs to accept scale_x and scale_y.
836
+ # We will use actual_scale_x assuming aspect ratio is maintained by pdfplumber,
837
+ # or if not, it's a reasonable approximation for highlight scaling.
838
+ renderer_scale = actual_scale_x
839
+
840
+ renderer = HighlightRenderer(page_obj, base_image_pil, preview_highlights, renderer_scale, render_ocr)
772
841
  rendered_image = renderer.render()
773
842
 
774
- # Create legend only from temporary highlights
775
843
  legend = None
776
844
  if labels:
777
845
  preview_labels = {h.label: h.color for h in preview_highlights if h.label}
@@ -781,12 +849,15 @@ class HighlightingService:
781
849
  rendered_image, legend, position=legend_position
782
850
  )
783
851
  else:
784
- final_image = rendered_image # No legend needed
852
+ final_image = rendered_image
785
853
  else:
786
854
  final_image = rendered_image
787
855
 
788
- except Exception as e:
789
- logger.error(f"Error rendering preview for page {page_index}: {e}", exc_info=True)
790
- return None
856
+ except IOError as e:
857
+ logger.error(f"IOError rendering preview for page {page_index}: {e}")
858
+ raise
859
+ except AttributeError as e:
860
+ logger.error(f"AttributeError rendering preview for page {page_index}: {e}")
861
+ raise
791
862
 
792
863
  return final_image