natural-pdf 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. natural_pdf/__init__.py +55 -0
  2. natural_pdf/analyzers/__init__.py +6 -0
  3. natural_pdf/analyzers/layout/__init__.py +1 -0
  4. natural_pdf/analyzers/layout/base.py +151 -0
  5. natural_pdf/analyzers/layout/docling.py +247 -0
  6. natural_pdf/analyzers/layout/layout_analyzer.py +166 -0
  7. natural_pdf/analyzers/layout/layout_manager.py +200 -0
  8. natural_pdf/analyzers/layout/layout_options.py +78 -0
  9. natural_pdf/analyzers/layout/paddle.py +240 -0
  10. natural_pdf/analyzers/layout/surya.py +151 -0
  11. natural_pdf/analyzers/layout/tatr.py +251 -0
  12. natural_pdf/analyzers/layout/yolo.py +165 -0
  13. natural_pdf/analyzers/text_options.py +60 -0
  14. natural_pdf/analyzers/text_structure.py +270 -0
  15. natural_pdf/analyzers/utils.py +57 -0
  16. natural_pdf/core/__init__.py +3 -0
  17. natural_pdf/core/element_manager.py +457 -0
  18. natural_pdf/core/highlighting_service.py +698 -0
  19. natural_pdf/core/page.py +1444 -0
  20. natural_pdf/core/pdf.py +653 -0
  21. natural_pdf/elements/__init__.py +3 -0
  22. natural_pdf/elements/base.py +761 -0
  23. natural_pdf/elements/collections.py +1345 -0
  24. natural_pdf/elements/line.py +140 -0
  25. natural_pdf/elements/rect.py +122 -0
  26. natural_pdf/elements/region.py +1793 -0
  27. natural_pdf/elements/text.py +304 -0
  28. natural_pdf/ocr/__init__.py +56 -0
  29. natural_pdf/ocr/engine.py +104 -0
  30. natural_pdf/ocr/engine_easyocr.py +179 -0
  31. natural_pdf/ocr/engine_paddle.py +204 -0
  32. natural_pdf/ocr/engine_surya.py +171 -0
  33. natural_pdf/ocr/ocr_manager.py +191 -0
  34. natural_pdf/ocr/ocr_options.py +114 -0
  35. natural_pdf/qa/__init__.py +3 -0
  36. natural_pdf/qa/document_qa.py +396 -0
  37. natural_pdf/selectors/__init__.py +4 -0
  38. natural_pdf/selectors/parser.py +354 -0
  39. natural_pdf/templates/__init__.py +1 -0
  40. natural_pdf/templates/ocr_debug.html +517 -0
  41. natural_pdf/utils/__init__.py +3 -0
  42. natural_pdf/utils/highlighting.py +12 -0
  43. natural_pdf/utils/reading_order.py +227 -0
  44. natural_pdf/utils/visualization.py +223 -0
  45. natural_pdf/widgets/__init__.py +4 -0
  46. natural_pdf/widgets/frontend/viewer.js +88 -0
  47. natural_pdf/widgets/viewer.py +765 -0
  48. natural_pdf-0.1.0.dist-info/METADATA +295 -0
  49. natural_pdf-0.1.0.dist-info/RECORD +52 -0
  50. natural_pdf-0.1.0.dist-info/WHEEL +5 -0
  51. natural_pdf-0.1.0.dist-info/licenses/LICENSE +21 -0
  52. natural_pdf-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,304 @@
1
+ """
2
+ Text element classes for natural-pdf.
3
+ """
4
+ from typing import Dict, Any, Optional, TYPE_CHECKING
5
+
6
+ from natural_pdf.elements.base import Element
7
+
8
+ if TYPE_CHECKING:
9
+ from natural_pdf.core.page import Page
10
+
11
+
12
+ class TextElement(Element):
13
+ """
14
+ Represents a text element in a PDF.
15
+
16
+ This class is a wrapper around pdfplumber's character objects,
17
+ providing additional functionality for text extraction and analysis.
18
+ """
19
+
20
+ def __init__(self, obj: Dict[str, Any], page: 'Page'):
21
+ """
22
+ Initialize a text element.
23
+
24
+ Args:
25
+ obj: The underlying pdfplumber object. For OCR text elements,
26
+ should include 'text', 'bbox', 'source', and 'confidence'
27
+ page: The parent Page object
28
+ """
29
+ # Add object_type if not present
30
+ if 'object_type' not in obj:
31
+ obj['object_type'] = 'text'
32
+
33
+ super().__init__(obj, page)
34
+
35
+ @property
36
+ def text(self) -> str:
37
+ """Get the text content."""
38
+ return self._obj.get('text', '')
39
+
40
+ @property
41
+ def source(self) -> str:
42
+ """Get the source of this text element (pdf or ocr)."""
43
+ return self._obj.get('source', 'pdf')
44
+
45
+ @property
46
+ def confidence(self) -> float:
47
+ """Get the confidence score for OCR text elements."""
48
+ return self._obj.get('confidence', 1.0)
49
+
50
+ @property
51
+ def fontname(self) -> str:
52
+ """Get the font name."""
53
+ # First check if we have a real fontname from PDF resources
54
+ if 'real_fontname' in self._obj:
55
+ return self._obj['real_fontname']
56
+ # Otherwise use standard fontname
57
+ return self._obj.get('fontname', '') or self._obj.get('font', '')
58
+
59
+ @property
60
+ def font_family(self) -> str:
61
+ """
62
+ Get a cleaner font family name by stripping PDF-specific prefixes.
63
+
64
+ PDF font names often include prefixes like 'ABCDEF+' followed by the font name
65
+ or unique identifiers. This method attempts to extract a more readable font name.
66
+ """
67
+ font = self.fontname
68
+
69
+ # Remove common PDF font prefixes (e.g., 'ABCDEF+')
70
+ if '+' in font:
71
+ font = font.split('+', 1)[1]
72
+
73
+ # Try to extract common font family names
74
+ common_fonts = [
75
+ 'Arial', 'Helvetica', 'Times', 'Courier', 'Calibri',
76
+ 'Cambria', 'Georgia', 'Verdana', 'Tahoma', 'Trebuchet'
77
+ ]
78
+
79
+ for common in common_fonts:
80
+ if common.lower() in font.lower():
81
+ return common
82
+
83
+ return font
84
+
85
+ @property
86
+ def font_variant(self) -> str:
87
+ """
88
+ Get the font variant identifier (prefix before the '+' in PDF font names).
89
+
90
+ PDF embeds font subsets with unique identifiers like 'AAAAAB+FontName'.
91
+ Different variants of the same base font will have different prefixes.
92
+ This can be used to differentiate text that looks different despite
93
+ having the same font name and size.
94
+
95
+ Returns:
96
+ The font variant prefix, or empty string if no variant is present
97
+ """
98
+ font = self.fontname
99
+
100
+ # Extract the prefix before '+' if it exists
101
+ if '+' in font:
102
+ return font.split('+', 1)[0]
103
+
104
+ return ""
105
+
106
+ @property
107
+ def size(self) -> float:
108
+ """Get the font size."""
109
+ return self._obj.get('size', 0)
110
+
111
+ @property
112
+ def color(self) -> tuple:
113
+ """Get the text color (RGB tuple)."""
114
+ # PDFs often use non-RGB values, so we handle different formats
115
+ # In pdfplumber, colors can be in various formats depending on the PDF
116
+ color = self._obj.get('non_stroking_color', (0, 0, 0))
117
+
118
+ # If it's a single value, treat as grayscale
119
+ if isinstance(color, (int, float)):
120
+ return (color, color, color)
121
+
122
+ # If it's a tuple of 3 values, treat as RGB
123
+ if isinstance(color, tuple) and len(color) == 3:
124
+ return color
125
+
126
+ # If it's a tuple of 4 values, treat as CMYK and convert to approximate RGB
127
+ if isinstance(color, tuple) and len(color) == 4:
128
+ c, m, y, k = color
129
+ r = 1 - min(1, c + k)
130
+ g = 1 - min(1, m + k)
131
+ b = 1 - min(1, y + k)
132
+ return (r, g, b)
133
+
134
+ # Default to black
135
+ return (0, 0, 0)
136
+
137
+ def extract_text(self, keep_blank_chars=True, **kwargs) -> str:
138
+ """
139
+ Extract text from this element.
140
+
141
+ Args:
142
+ keep_blank_chars: Whether to keep blank characters (default: True)
143
+ **kwargs: Additional extraction parameters
144
+
145
+ Returns:
146
+ Text content
147
+ """
148
+ # For text elements, keep_blank_chars doesn't affect anything as we're
149
+ # simply returning the text property. Included for API consistency.
150
+ return self.text
151
+
152
+ def contains(self, substring: str, case_sensitive: bool = True) -> bool:
153
+ """
154
+ Check if this text element contains a substring.
155
+
156
+ Args:
157
+ substring: The substring to check for
158
+ case_sensitive: Whether the check is case-sensitive
159
+
160
+ Returns:
161
+ True if the text contains the substring
162
+ """
163
+ if case_sensitive:
164
+ return substring in self.text
165
+ else:
166
+ return substring.lower() in self.text.lower()
167
+
168
+ def matches(self, pattern: str) -> bool:
169
+ """
170
+ Check if this text element matches a regular expression pattern.
171
+
172
+ Args:
173
+ pattern: Regular expression pattern
174
+
175
+ Returns:
176
+ True if the text matches the pattern
177
+ """
178
+ import re
179
+ return bool(re.search(pattern, self.text))
180
+
181
+ @property
182
+ def bold(self) -> bool:
183
+ """
184
+ Check if the text is bold based on multiple indicators in the PDF.
185
+
186
+ PDFs encode boldness in several ways:
187
+ 1. Font name containing 'bold' or 'black'
188
+ 2. Font descriptor flags (bit 2 indicates bold)
189
+ 3. StemV value (thickness of vertical stems)
190
+ 4. Font weight values (700+ is typically bold)
191
+ 5. Text rendering mode 2 (fill and stroke)
192
+ """
193
+ # Check font name (original method)
194
+ fontname = self.fontname.lower()
195
+ if 'bold' in fontname or 'black' in fontname or self.fontname.endswith('-B'):
196
+ return True
197
+
198
+ # Check font descriptor flags if available (bit 2 = bold)
199
+ flags = self._obj.get('flags')
200
+ if flags is not None and (flags & 4) != 0: # Check if bit 2 is set
201
+ return True
202
+
203
+ # Check StemV (vertical stem width) if available
204
+ # Higher StemV values indicate bolder fonts
205
+ stemv = self._obj.get('stemv') or self._obj.get('StemV')
206
+ if stemv is not None and isinstance(stemv, (int, float)) and stemv > 120:
207
+ return True
208
+
209
+ # Check font weight if available (700+ is typically bold)
210
+ weight = self._obj.get('weight') or self._obj.get('FontWeight')
211
+ if weight is not None and isinstance(weight, (int, float)) and weight >= 700:
212
+ return True
213
+
214
+ # Check text rendering mode (mode 2 = fill and stroke, can make text appear bold)
215
+ render_mode = self._obj.get('render_mode')
216
+ if render_mode is not None and render_mode == 2:
217
+ return True
218
+
219
+ # Additional check: if we have text with the same font but different paths/strokes
220
+ # Path widths or stroke widths can indicate boldness
221
+ stroke_width = self._obj.get('stroke_width') or self._obj.get('lineWidth')
222
+ if stroke_width is not None and isinstance(stroke_width, (int, float)) and stroke_width > 0:
223
+ return True
224
+
225
+ return False
226
+
227
+ @property
228
+ def italic(self) -> bool:
229
+ """
230
+ Check if the text is italic based on multiple indicators in the PDF.
231
+
232
+ PDFs encode italic (oblique) text in several ways:
233
+ 1. Font name containing 'italic' or 'oblique'
234
+ 2. Font descriptor flags (bit 6 indicates italic)
235
+ 3. Text with non-zero slant angle
236
+ """
237
+ # Check font name (original method)
238
+ fontname = self.fontname.lower()
239
+ if 'italic' in fontname or 'oblique' in fontname or self.fontname.endswith('-I'):
240
+ return True
241
+
242
+ # Check font descriptor flags if available (bit 6 = italic)
243
+ flags = self._obj.get('flags')
244
+ if flags is not None and (flags & 64) != 0: # Check if bit 6 is set
245
+ return True
246
+
247
+ # Check italic angle if available
248
+ # Non-zero italic angle indicates italic font
249
+ italic_angle = self._obj.get('italic_angle') or self._obj.get('ItalicAngle')
250
+ if italic_angle is not None and isinstance(italic_angle, (int, float)) and italic_angle != 0:
251
+ return True
252
+
253
+ return False
254
+
255
+ def __repr__(self) -> str:
256
+ """String representation of the text element."""
257
+ preview = self.text[:10] + '...' if len(self.text) > 10 else self.text
258
+ font_style = []
259
+ if self.bold:
260
+ font_style.append("bold")
261
+ if self.italic:
262
+ font_style.append("italic")
263
+ style_str = f", style={font_style}" if font_style else ""
264
+
265
+ # Use font_family for display but include raw fontname and variant
266
+ font_display = self.font_family
267
+ variant = self.font_variant
268
+ variant_str = f", variant='{variant}'" if variant else ""
269
+
270
+ if font_display != self.fontname and '+' in self.fontname:
271
+ base_font = self.fontname.split('+', 1)[1]
272
+ font_display = f"{font_display} ({base_font})"
273
+
274
+ return f"<TextElement text='{preview}' font='{font_display}'{variant_str} size={self.size}{style_str} bbox={self.bbox}>"
275
+
276
+ def font_info(self) -> dict:
277
+ """
278
+ Get detailed font information for this text element.
279
+
280
+ Returns a dictionary with all available font-related properties,
281
+ useful for debugging font detection issues.
282
+ """
283
+ info = {
284
+ 'text': self.text,
285
+ 'fontname': self.fontname,
286
+ 'font_family': self.font_family,
287
+ 'font_variant': self.font_variant,
288
+ 'size': self.size,
289
+ 'bold': self.bold,
290
+ 'italic': self.italic,
291
+ 'color': self.color
292
+ }
293
+
294
+ # Include raw font properties from the PDF
295
+ font_props = [
296
+ 'flags', 'stemv', 'StemV', 'weight', 'FontWeight',
297
+ 'render_mode', 'stroke_width', 'lineWidth'
298
+ ]
299
+
300
+ for prop in font_props:
301
+ if prop in self._obj:
302
+ info[f"raw_{prop}"] = self._obj[prop]
303
+
304
+ return info
@@ -0,0 +1,56 @@
1
+ """
2
+ OCR engines for natural-pdf.
3
+
4
+ This module provides different OCR engines that can be used with natural-pdf.
5
+ """
6
+ import logging
7
+
8
+ # Set up module logger
9
+ logger = logging.getLogger("natural_pdf.ocr")
10
+ from .ocr_manager import OCRManager
11
+ from .engine import OCREngine
12
+ from .ocr_options import OCROptions
13
+ from .engine import OCREngine
14
+ from .engine_paddle import PaddleOCREngine
15
+ from .engine_surya import SuryaOCREngine
16
+
17
+ __all__ = ['OCRManager', 'OCREngine', 'OCROptions', 'EasyOCREngine', 'PaddleOCREngine', 'SuryaOCREngine']
18
+
19
+ DEFAULT_ENGINE = SuryaOCREngine
20
+
21
+ def get_engine(engine_name=None, **kwargs):
22
+ """
23
+ Get OCR engine by name.
24
+
25
+ Args:
26
+ engine_name: Name of the engine to use ('easyocr', 'paddleocr', etc.)
27
+ If None, the default engine is used (PaddleOCR if available, otherwise EasyOCR)
28
+ **kwargs: Additional arguments to pass to the engine constructor
29
+
30
+ Returns:
31
+ OCREngine instance
32
+ """
33
+ logger.debug(f"Initializing OCR engine: {engine_name or 'default'}")
34
+
35
+ if engine_name is None or engine_name == 'default':
36
+ engine = DEFAULT_ENGINE(**kwargs)
37
+ logger.info(f"Using default OCR engine: {engine.__class__.__name__}")
38
+ return engine
39
+
40
+ if engine_name.lower() == 'easyocr':
41
+ logger.info("Initializing EasyOCR engine")
42
+ return EasyOCREngine(**kwargs)
43
+
44
+ if engine_name.lower() == 'paddleocr':
45
+ try:
46
+ from .engine_paddle import PaddleOCREngine
47
+ logger.info("Initializing PaddleOCR engine")
48
+ return PaddleOCREngine(**kwargs)
49
+ except ImportError:
50
+ logger.error("PaddleOCR is not installed")
51
+ raise ImportError(
52
+ "PaddleOCR is not installed. Please install it with: pip install paddlepaddle paddleocr"
53
+ )
54
+
55
+ logger.error(f"Unknown OCR engine: {engine_name}")
56
+ raise ValueError(f"Unknown OCR engine: {engine_name}")
@@ -0,0 +1,104 @@
1
+ # ocr_engine_base.py
2
+ import logging
3
+ from abc import ABC, abstractmethod
4
+ from typing import Dict, List, Any, Optional, Tuple, Union
5
+ from PIL import Image
6
+
7
+ # Assuming ocr_options defines BaseOCROptions
8
+ from .ocr_options import BaseOCROptions
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class OCREngine(ABC):
13
+ """Abstract Base Class for OCR engines."""
14
+
15
+ def __init__(self):
16
+ """Initializes the base OCR engine."""
17
+ self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
18
+ self.logger.info(f"Initializing {self.__class__.__name__}")
19
+ self._reader_cache = {} # Cache for initialized models/readers
20
+
21
+ @abstractmethod
22
+ def process_image(
23
+ self,
24
+ images: Union[Image.Image, List[Image.Image]], # Accept single or list
25
+ options: BaseOCROptions
26
+ ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]: # Return single or list of lists
27
+ """
28
+ Processes a single image or a batch of images using the specific engine and options.
29
+
30
+ Args:
31
+ images: A single PIL Image or a list of PIL Images.
32
+ options: An instance of a dataclass inheriting from BaseOCROptions
33
+ containing configuration for this run.
34
+
35
+ Returns:
36
+ If input is a single image: List of result dictionaries.
37
+ If input is a list of images: List of lists of result dictionaries,
38
+ corresponding to each input image.
39
+ An empty list indicates failure for that image.
40
+ """
41
+ raise NotImplementedError("Subclasses must implement this method")
42
+
43
+ @abstractmethod
44
+ def is_available(self) -> bool:
45
+ """
46
+ Check if the engine's dependencies are installed and usable.
47
+
48
+ Returns:
49
+ True if the engine is available, False otherwise.
50
+ """
51
+ raise NotImplementedError("Subclasses must implement this method")
52
+
53
+ def _get_cache_key(self, options: BaseOCROptions) -> str:
54
+ """
55
+ Generates a cache key based on relevant options.
56
+ Subclasses should override if more specific key generation is needed.
57
+
58
+ Args:
59
+ options: The options dataclass instance.
60
+
61
+ Returns:
62
+ A string cache key.
63
+ """
64
+ # Basic key includes languages and device
65
+ lang_key = "-".join(sorted(options.languages))
66
+ device_key = str(options.device).lower()
67
+ return f"{self.__class__.__name__}_{lang_key}_{device_key}"
68
+
69
+ def _standardize_bbox(self, bbox: Any) -> Optional[Tuple[float, float, float, float]]:
70
+ """
71
+ Helper to standardize bounding boxes to (x0, y0, x1, y1) format.
72
+
73
+ Args:
74
+ bbox: The bounding box in the engine's native format.
75
+ Expected formats:
76
+ - List/Tuple of 4 numbers: (x0, y0, x1, y1)
77
+ - List of points: [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] (polygon)
78
+
79
+ Returns:
80
+ Tuple[float, float, float, float] or None if conversion fails.
81
+ """
82
+ try:
83
+ if isinstance(bbox, (list, tuple)) and len(bbox) == 4 and all(isinstance(n, (int, float)) for n in bbox):
84
+ # Already in (x0, y0, x1, y1) format (or similar)
85
+ return tuple(float(c) for c in bbox[:4])
86
+ elif isinstance(bbox, (list, tuple)) and len(bbox) > 0 and isinstance(bbox[0], (list, tuple)):
87
+ # Polygon format [[x1,y1],[x2,y2],...]
88
+ x_coords = [float(point[0]) for point in bbox]
89
+ y_coords = [float(point[1]) for point in bbox]
90
+ x0 = min(x_coords)
91
+ y0 = min(y_coords)
92
+ x1 = max(x_coords)
93
+ y1 = max(y_coords)
94
+ return (x0, y0, x1, y1)
95
+ except Exception as e:
96
+ self.logger.warning(f"Could not standardize bounding box: {bbox}. Error: {e}")
97
+ return None
98
+
99
+ def __del__(self):
100
+ """Cleanup resources when the engine is deleted."""
101
+ self.logger.info(f"Cleaning up {self.__class__.__name__} resources.")
102
+ # Clear reader cache to free up memory/GPU resources
103
+ self._reader_cache.clear()
104
+
@@ -0,0 +1,179 @@
1
+ # ocr_engine_easyocr.py
2
+ import logging
3
+ import importlib.util
4
+ from typing import Dict, List, Any, Optional, Tuple, Union
5
+ import numpy as np
6
+ from PIL import Image
7
+ import inspect # Used for dynamic parameter passing
8
+
9
+ from .engine import OCREngine
10
+ from .ocr_options import EasyOCROptions, BaseOCROptions
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class EasyOCREngine(OCREngine):
15
+ """EasyOCR engine implementation."""
16
+
17
+ def __init__(self):
18
+ super().__init__()
19
+ self._easyocr = None # Lazy load easyocr module
20
+
21
+ def _lazy_import_easyocr(self):
22
+ """Imports easyocr only when needed."""
23
+ if self._easyocr is None:
24
+ if not self.is_available():
25
+ raise ImportError("EasyOCR is not installed or available.")
26
+ try:
27
+ import easyocr
28
+ self._easyocr = easyocr
29
+ logger.info("EasyOCR module imported successfully.")
30
+ except ImportError as e:
31
+ logger.error(f"Failed to import EasyOCR: {e}")
32
+ raise
33
+ return self._easyocr
34
+
35
+ def is_available(self) -> bool:
36
+ """Check if EasyOCR is installed."""
37
+ return importlib.util.find_spec("easyocr") is not None
38
+
39
+ def _get_cache_key(self, options: EasyOCROptions) -> str:
40
+ """Generate a more specific cache key for EasyOCR."""
41
+ base_key = super()._get_cache_key(options)
42
+ recog_key = options.recog_network
43
+ detect_key = options.detect_network
44
+ quantize_key = str(options.quantize)
45
+ return f"{base_key}_{recog_key}_{detect_key}_{quantize_key}"
46
+
47
+ def _get_reader(self, options: EasyOCROptions):
48
+ """Get or initialize an EasyOCR reader based on options."""
49
+ cache_key = self._get_cache_key(options)
50
+ if cache_key in self._reader_cache:
51
+ logger.debug(f"Using cached EasyOCR reader for key: {cache_key}")
52
+ return self._reader_cache[cache_key]
53
+
54
+ logger.info(f"Creating new EasyOCR reader for key: {cache_key}")
55
+ easyocr = self._lazy_import_easyocr()
56
+
57
+ constructor_sig = inspect.signature(easyocr.Reader.__init__)
58
+ constructor_args = {}
59
+ constructor_args['lang_list'] = options.languages
60
+ constructor_args['gpu'] = 'cuda' in str(options.device).lower() or 'mps' in str(options.device).lower()
61
+
62
+ for field_name, param in constructor_sig.parameters.items():
63
+ if field_name in ['self', 'lang_list', 'gpu']: continue
64
+ if hasattr(options, field_name):
65
+ constructor_args[field_name] = getattr(options, field_name)
66
+ elif field_name in options.extra_args:
67
+ constructor_args[field_name] = options.extra_args[field_name]
68
+
69
+ logger.debug(f"EasyOCR Reader constructor args: {constructor_args}")
70
+ try:
71
+ reader = easyocr.Reader(**constructor_args)
72
+ self._reader_cache[cache_key] = reader
73
+ logger.info("EasyOCR reader created successfully.")
74
+ return reader
75
+ except Exception as e:
76
+ logger.error(f"Failed to create EasyOCR reader: {e}", exc_info=True)
77
+ raise
78
+
79
+ def _prepare_readtext_args(self, options: EasyOCROptions, reader) -> Dict[str, Any]:
80
+ """Helper to prepare arguments for the readtext method."""
81
+ readtext_sig = inspect.signature(reader.readtext)
82
+ readtext_args = {}
83
+ for field_name, param in readtext_sig.parameters.items():
84
+ if field_name == 'image': continue
85
+ if hasattr(options, field_name):
86
+ readtext_args[field_name] = getattr(options, field_name)
87
+ elif field_name in options.extra_args:
88
+ readtext_args[field_name] = options.extra_args[field_name]
89
+ logger.debug(f"EasyOCR readtext args: {readtext_args}")
90
+ return readtext_args
91
+
92
+ def _standardize_results(self, raw_results: List[Any], options: EasyOCROptions) -> List[Dict[str, Any]]:
93
+ """Standardizes raw results from EasyOCR's readtext."""
94
+ standardized_results = []
95
+ min_confidence = options.min_confidence
96
+
97
+ for detection in raw_results:
98
+ try:
99
+ if options.detail == 1 and isinstance(detection, (list, tuple)) and len(detection) >= 3:
100
+ bbox_raw = detection[0]
101
+ text = str(detection[1])
102
+ confidence = float(detection[2])
103
+
104
+ if confidence >= min_confidence:
105
+ bbox = self._standardize_bbox(bbox_raw)
106
+ if bbox:
107
+ standardized_results.append({
108
+ 'bbox': bbox, 'text': text, 'confidence': confidence, 'source': 'ocr'
109
+ })
110
+ else:
111
+ logger.warning(f"Skipping result due to invalid bbox: {bbox_raw}")
112
+
113
+ elif options.detail == 0 and isinstance(detection, str):
114
+ standardized_results.append({
115
+ 'bbox': None, 'text': detection, 'confidence': 1.0, 'source': 'ocr'
116
+ })
117
+ except (IndexError, ValueError, TypeError) as e:
118
+ logger.warning(f"Skipping invalid detection format: {detection}. Error: {e}")
119
+ continue
120
+ return standardized_results
121
+
122
+
123
+ def process_image(
124
+ self,
125
+ images: Union[Image.Image, List[Image.Image]],
126
+ options: BaseOCROptions
127
+ ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
128
+ """Processes a single image or a batch of images with EasyOCR."""
129
+
130
+ if not isinstance(options, EasyOCROptions):
131
+ logger.warning("Received BaseOCROptions, expected EasyOCROptions. Using defaults.")
132
+ # Create default EasyOCR options if base was passed, preserving base settings
133
+ options = EasyOCROptions(
134
+ languages=options.languages,
135
+ min_confidence=options.min_confidence,
136
+ device=options.device,
137
+ extra_args=options.extra_args # Pass along any extra args
138
+ )
139
+
140
+ reader = self._get_reader(options)
141
+ readtext_args = self._prepare_readtext_args(options, reader)
142
+
143
+ # --- Handle single image or batch ---
144
+ if isinstance(images, list):
145
+ # --- Batch Processing (Iterative for EasyOCR) ---
146
+ all_results = []
147
+ logger.info(f"Processing batch of {len(images)} images with EasyOCR (iteratively)...")
148
+ for i, img in enumerate(images):
149
+ if not isinstance(img, Image.Image):
150
+ logger.warning(f"Item at index {i} in batch is not a PIL Image. Skipping.")
151
+ all_results.append([])
152
+ continue
153
+ img_array = np.array(img)
154
+ try:
155
+ logger.debug(f"Processing image {i+1}/{len(images)} in batch.")
156
+ raw_results = reader.readtext(img_array, **readtext_args)
157
+ standardized = self._standardize_results(raw_results, options)
158
+ all_results.append(standardized)
159
+ except Exception as e:
160
+ logger.error(f"Error processing image {i+1} in EasyOCR batch: {e}", exc_info=True)
161
+ all_results.append([]) # Append empty list for failed image
162
+ logger.info(f"Finished processing batch with EasyOCR.")
163
+ return all_results # Return List[List[Dict]]
164
+
165
+ elif isinstance(images, Image.Image):
166
+ # --- Single Image Processing ---
167
+ logger.info("Processing single image with EasyOCR...")
168
+ img_array = np.array(images)
169
+ try:
170
+ raw_results = reader.readtext(img_array, **readtext_args)
171
+ standardized = self._standardize_results(raw_results, options)
172
+ logger.info(f"Finished processing single image. Found {len(standardized)} results.")
173
+ return standardized # Return List[Dict]
174
+ except Exception as e:
175
+ logger.error(f"Error processing single image with EasyOCR: {e}", exc_info=True)
176
+ return [] # Return empty list on failure
177
+ else:
178
+ raise TypeError("Input 'images' must be a PIL Image or a list of PIL Images.")
179
+