natural-pdf 25.3.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. examples/__init__.py +3 -0
  2. examples/another_exclusion_example.py +20 -0
  3. examples/basic_usage.py +190 -0
  4. examples/boundary_exclusion_test.py +137 -0
  5. examples/boundary_inclusion_fix_test.py +157 -0
  6. examples/chainable_layout_example.py +70 -0
  7. examples/color_basic_test.py +49 -0
  8. examples/color_name_example.py +71 -0
  9. examples/color_test.py +62 -0
  10. examples/debug_ocr.py +91 -0
  11. examples/direct_ocr_test.py +148 -0
  12. examples/direct_paddle_test.py +99 -0
  13. examples/direct_qa_example.py +165 -0
  14. examples/document_layout_analysis.py +123 -0
  15. examples/document_qa_example.py +185 -0
  16. examples/exclusion_count_debug.py +128 -0
  17. examples/exclusion_debug.py +107 -0
  18. examples/exclusion_example.py +150 -0
  19. examples/exclusion_optimization_example.py +190 -0
  20. examples/extract_text_test.py +128 -0
  21. examples/font_aware_example.py +101 -0
  22. examples/font_variant_example.py +124 -0
  23. examples/footer_overlap_test.py +124 -0
  24. examples/highlight_all_example.py +82 -0
  25. examples/highlight_attributes_test.py +114 -0
  26. examples/highlight_confidence_display.py +122 -0
  27. examples/highlight_demo.py +110 -0
  28. examples/highlight_float_test.py +71 -0
  29. examples/highlight_test.py +147 -0
  30. examples/highlighting_example.py +123 -0
  31. examples/image_width_example.py +84 -0
  32. examples/improved_api_example.py +128 -0
  33. examples/layout_confidence_display_test.py +65 -0
  34. examples/layout_confidence_test.py +82 -0
  35. examples/layout_coordinate_debug.py +258 -0
  36. examples/layout_highlight_test.py +77 -0
  37. examples/logging_example.py +70 -0
  38. examples/ocr_comprehensive.py +193 -0
  39. examples/ocr_debug_example.py +87 -0
  40. examples/ocr_default_test.py +97 -0
  41. examples/ocr_engine_comparison.py +235 -0
  42. examples/ocr_example.py +89 -0
  43. examples/ocr_simplified_params.py +79 -0
  44. examples/ocr_visualization.py +102 -0
  45. examples/ocr_visualization_test.py +121 -0
  46. examples/paddle_layout_example.py +315 -0
  47. examples/paddle_layout_simple.py +74 -0
  48. examples/paddleocr_example.py +224 -0
  49. examples/page_collection_example.py +103 -0
  50. examples/polygon_highlight_example.py +83 -0
  51. examples/position_methods_example.py +134 -0
  52. examples/region_boundary_test.py +73 -0
  53. examples/region_exclusion_test.py +149 -0
  54. examples/region_expand_example.py +109 -0
  55. examples/region_image_example.py +116 -0
  56. examples/region_ocr_test.py +119 -0
  57. examples/region_sections_example.py +115 -0
  58. examples/school_books.py +49 -0
  59. examples/school_books_all.py +52 -0
  60. examples/scouring.py +36 -0
  61. examples/section_extraction_example.py +232 -0
  62. examples/simple_document_qa.py +97 -0
  63. examples/spatial_navigation_example.py +108 -0
  64. examples/table_extraction_example.py +135 -0
  65. examples/table_structure_detection.py +155 -0
  66. examples/tatr_cells_test.py +56 -0
  67. examples/tatr_ocr_table_test.py +94 -0
  68. examples/text_search_example.py +122 -0
  69. examples/text_style_example.py +110 -0
  70. examples/tiny-text.py +61 -0
  71. examples/until_boundaries_example.py +156 -0
  72. examples/until_example.py +112 -0
  73. examples/very_basics.py +15 -0
  74. natural_pdf/__init__.py +55 -0
  75. natural_pdf/analyzers/__init__.py +9 -0
  76. natural_pdf/analyzers/document_layout.py +736 -0
  77. natural_pdf/analyzers/text_structure.py +153 -0
  78. natural_pdf/core/__init__.py +3 -0
  79. natural_pdf/core/page.py +2376 -0
  80. natural_pdf/core/pdf.py +572 -0
  81. natural_pdf/elements/__init__.py +3 -0
  82. natural_pdf/elements/base.py +553 -0
  83. natural_pdf/elements/collections.py +770 -0
  84. natural_pdf/elements/line.py +124 -0
  85. natural_pdf/elements/rect.py +122 -0
  86. natural_pdf/elements/region.py +1366 -0
  87. natural_pdf/elements/text.py +304 -0
  88. natural_pdf/ocr/__init__.py +62 -0
  89. natural_pdf/ocr/easyocr_engine.py +254 -0
  90. natural_pdf/ocr/engine.py +158 -0
  91. natural_pdf/ocr/paddleocr_engine.py +263 -0
  92. natural_pdf/qa/__init__.py +3 -0
  93. natural_pdf/qa/document_qa.py +405 -0
  94. natural_pdf/selectors/__init__.py +4 -0
  95. natural_pdf/selectors/parser.py +360 -0
  96. natural_pdf/templates/__init__.py +1 -0
  97. natural_pdf/templates/ocr_debug.html +517 -0
  98. natural_pdf/utils/__init__.py +4 -0
  99. natural_pdf/utils/highlighting.py +605 -0
  100. natural_pdf/utils/ocr.py +515 -0
  101. natural_pdf/utils/reading_order.py +227 -0
  102. natural_pdf/utils/visualization.py +151 -0
  103. natural_pdf-25.3.16.dist-info/LICENSE +21 -0
  104. natural_pdf-25.3.16.dist-info/METADATA +268 -0
  105. natural_pdf-25.3.16.dist-info/RECORD +109 -0
  106. natural_pdf-25.3.16.dist-info/WHEEL +5 -0
  107. natural_pdf-25.3.16.dist-info/top_level.txt +3 -0
  108. tests/__init__.py +3 -0
  109. tests/test_pdf.py +39 -0
@@ -0,0 +1,304 @@
1
+ """
2
+ Text element classes for natural-pdf.
3
+ """
4
+ from typing import Dict, Any, Optional, TYPE_CHECKING
5
+
6
+ from natural_pdf.elements.base import Element
7
+
8
+ if TYPE_CHECKING:
9
+ from natural_pdf.core.page import Page
10
+
11
+
12
+ class TextElement(Element):
13
+ """
14
+ Represents a text element in a PDF.
15
+
16
+ This class is a wrapper around pdfplumber's character objects,
17
+ providing additional functionality for text extraction and analysis.
18
+ """
19
+
20
+ def __init__(self, obj: Dict[str, Any], page: 'Page'):
21
+ """
22
+ Initialize a text element.
23
+
24
+ Args:
25
+ obj: The underlying pdfplumber object. For OCR text elements,
26
+ should include 'text', 'bbox', 'source', and 'confidence'
27
+ page: The parent Page object
28
+ """
29
+ # Add object_type if not present
30
+ if 'object_type' not in obj:
31
+ obj['object_type'] = 'text'
32
+
33
+ super().__init__(obj, page)
34
+
35
+ @property
36
+ def text(self) -> str:
37
+ """Get the text content."""
38
+ return self._obj.get('text', '')
39
+
40
+ @property
41
+ def source(self) -> str:
42
+ """Get the source of this text element (pdf or ocr)."""
43
+ return self._obj.get('source', 'pdf')
44
+
45
+ @property
46
+ def confidence(self) -> float:
47
+ """Get the confidence score for OCR text elements."""
48
+ return self._obj.get('confidence', 1.0)
49
+
50
+ @property
51
+ def fontname(self) -> str:
52
+ """Get the font name."""
53
+ # First check if we have a real fontname from PDF resources
54
+ if 'real_fontname' in self._obj:
55
+ return self._obj['real_fontname']
56
+ # Otherwise use standard fontname
57
+ return self._obj.get('fontname', '') or self._obj.get('font', '')
58
+
59
+ @property
60
+ def font_family(self) -> str:
61
+ """
62
+ Get a cleaner font family name by stripping PDF-specific prefixes.
63
+
64
+ PDF font names often include prefixes like 'ABCDEF+' followed by the font name
65
+ or unique identifiers. This method attempts to extract a more readable font name.
66
+ """
67
+ font = self.fontname
68
+
69
+ # Remove common PDF font prefixes (e.g., 'ABCDEF+')
70
+ if '+' in font:
71
+ font = font.split('+', 1)[1]
72
+
73
+ # Try to extract common font family names
74
+ common_fonts = [
75
+ 'Arial', 'Helvetica', 'Times', 'Courier', 'Calibri',
76
+ 'Cambria', 'Georgia', 'Verdana', 'Tahoma', 'Trebuchet'
77
+ ]
78
+
79
+ for common in common_fonts:
80
+ if common.lower() in font.lower():
81
+ return common
82
+
83
+ return font
84
+
85
+ @property
86
+ def font_variant(self) -> str:
87
+ """
88
+ Get the font variant identifier (prefix before the '+' in PDF font names).
89
+
90
+ PDF embeds font subsets with unique identifiers like 'AAAAAB+FontName'.
91
+ Different variants of the same base font will have different prefixes.
92
+ This can be used to differentiate text that looks different despite
93
+ having the same font name and size.
94
+
95
+ Returns:
96
+ The font variant prefix, or empty string if no variant is present
97
+ """
98
+ font = self.fontname
99
+
100
+ # Extract the prefix before '+' if it exists
101
+ if '+' in font:
102
+ return font.split('+', 1)[0]
103
+
104
+ return ""
105
+
106
+ @property
107
+ def size(self) -> float:
108
+ """Get the font size."""
109
+ return self._obj.get('size', 0)
110
+
111
+ @property
112
+ def color(self) -> tuple:
113
+ """Get the text color (RGB tuple)."""
114
+ # PDFs often use non-RGB values, so we handle different formats
115
+ # In pdfplumber, colors can be in various formats depending on the PDF
116
+ color = self._obj.get('non_stroking_color', (0, 0, 0))
117
+
118
+ # If it's a single value, treat as grayscale
119
+ if isinstance(color, (int, float)):
120
+ return (color, color, color)
121
+
122
+ # If it's a tuple of 3 values, treat as RGB
123
+ if isinstance(color, tuple) and len(color) == 3:
124
+ return color
125
+
126
+ # If it's a tuple of 4 values, treat as CMYK and convert to approximate RGB
127
+ if isinstance(color, tuple) and len(color) == 4:
128
+ c, m, y, k = color
129
+ r = 1 - min(1, c + k)
130
+ g = 1 - min(1, m + k)
131
+ b = 1 - min(1, y + k)
132
+ return (r, g, b)
133
+
134
+ # Default to black
135
+ return (0, 0, 0)
136
+
137
+ def extract_text(self, keep_blank_chars=True, **kwargs) -> str:
138
+ """
139
+ Extract text from this element.
140
+
141
+ Args:
142
+ keep_blank_chars: Whether to keep blank characters (default: True)
143
+ **kwargs: Additional extraction parameters
144
+
145
+ Returns:
146
+ Text content
147
+ """
148
+ # For text elements, keep_blank_chars doesn't affect anything as we're
149
+ # simply returning the text property. Included for API consistency.
150
+ return self.text
151
+
152
+ def contains(self, substring: str, case_sensitive: bool = True) -> bool:
153
+ """
154
+ Check if this text element contains a substring.
155
+
156
+ Args:
157
+ substring: The substring to check for
158
+ case_sensitive: Whether the check is case-sensitive
159
+
160
+ Returns:
161
+ True if the text contains the substring
162
+ """
163
+ if case_sensitive:
164
+ return substring in self.text
165
+ else:
166
+ return substring.lower() in self.text.lower()
167
+
168
+ def matches(self, pattern: str) -> bool:
169
+ """
170
+ Check if this text element matches a regular expression pattern.
171
+
172
+ Args:
173
+ pattern: Regular expression pattern
174
+
175
+ Returns:
176
+ True if the text matches the pattern
177
+ """
178
+ import re
179
+ return bool(re.search(pattern, self.text))
180
+
181
+ @property
182
+ def bold(self) -> bool:
183
+ """
184
+ Check if the text is bold based on multiple indicators in the PDF.
185
+
186
+ PDFs encode boldness in several ways:
187
+ 1. Font name containing 'bold' or 'black'
188
+ 2. Font descriptor flags (bit 2 indicates bold)
189
+ 3. StemV value (thickness of vertical stems)
190
+ 4. Font weight values (700+ is typically bold)
191
+ 5. Text rendering mode 2 (fill and stroke)
192
+ """
193
+ # Check font name (original method)
194
+ fontname = self.fontname.lower()
195
+ if 'bold' in fontname or 'black' in fontname or self.fontname.endswith('-B'):
196
+ return True
197
+
198
+ # Check font descriptor flags if available (bit 2 = bold)
199
+ flags = self._obj.get('flags')
200
+ if flags is not None and (flags & 4) != 0: # Check if bit 2 is set
201
+ return True
202
+
203
+ # Check StemV (vertical stem width) if available
204
+ # Higher StemV values indicate bolder fonts
205
+ stemv = self._obj.get('stemv') or self._obj.get('StemV')
206
+ if stemv is not None and isinstance(stemv, (int, float)) and stemv > 120:
207
+ return True
208
+
209
+ # Check font weight if available (700+ is typically bold)
210
+ weight = self._obj.get('weight') or self._obj.get('FontWeight')
211
+ if weight is not None and isinstance(weight, (int, float)) and weight >= 700:
212
+ return True
213
+
214
+ # Check text rendering mode (mode 2 = fill and stroke, can make text appear bold)
215
+ render_mode = self._obj.get('render_mode')
216
+ if render_mode is not None and render_mode == 2:
217
+ return True
218
+
219
+ # Additional check: if we have text with the same font but different paths/strokes
220
+ # Path widths or stroke widths can indicate boldness
221
+ stroke_width = self._obj.get('stroke_width') or self._obj.get('lineWidth')
222
+ if stroke_width is not None and isinstance(stroke_width, (int, float)) and stroke_width > 0:
223
+ return True
224
+
225
+ return False
226
+
227
+ @property
228
+ def italic(self) -> bool:
229
+ """
230
+ Check if the text is italic based on multiple indicators in the PDF.
231
+
232
+ PDFs encode italic (oblique) text in several ways:
233
+ 1. Font name containing 'italic' or 'oblique'
234
+ 2. Font descriptor flags (bit 6 indicates italic)
235
+ 3. Text with non-zero slant angle
236
+ """
237
+ # Check font name (original method)
238
+ fontname = self.fontname.lower()
239
+ if 'italic' in fontname or 'oblique' in fontname or self.fontname.endswith('-I'):
240
+ return True
241
+
242
+ # Check font descriptor flags if available (bit 6 = italic)
243
+ flags = self._obj.get('flags')
244
+ if flags is not None and (flags & 64) != 0: # Check if bit 6 is set
245
+ return True
246
+
247
+ # Check italic angle if available
248
+ # Non-zero italic angle indicates italic font
249
+ italic_angle = self._obj.get('italic_angle') or self._obj.get('ItalicAngle')
250
+ if italic_angle is not None and isinstance(italic_angle, (int, float)) and italic_angle != 0:
251
+ return True
252
+
253
+ return False
254
+
255
+ def __repr__(self) -> str:
256
+ """String representation of the text element."""
257
+ preview = self.text[:10] + '...' if len(self.text) > 10 else self.text
258
+ font_style = []
259
+ if self.bold:
260
+ font_style.append("bold")
261
+ if self.italic:
262
+ font_style.append("italic")
263
+ style_str = f", style={font_style}" if font_style else ""
264
+
265
+ # Use font_family for display but include raw fontname and variant
266
+ font_display = self.font_family
267
+ variant = self.font_variant
268
+ variant_str = f", variant='{variant}'" if variant else ""
269
+
270
+ if font_display != self.fontname and '+' in self.fontname:
271
+ base_font = self.fontname.split('+', 1)[1]
272
+ font_display = f"{font_display} ({base_font})"
273
+
274
+ return f"<TextElement text='{preview}' font='{font_display}'{variant_str} size={self.size}{style_str} bbox={self.bbox}>"
275
+
276
+ def font_info(self) -> dict:
277
+ """
278
+ Get detailed font information for this text element.
279
+
280
+ Returns a dictionary with all available font-related properties,
281
+ useful for debugging font detection issues.
282
+ """
283
+ info = {
284
+ 'text': self.text,
285
+ 'fontname': self.fontname,
286
+ 'font_family': self.font_family,
287
+ 'font_variant': self.font_variant,
288
+ 'size': self.size,
289
+ 'bold': self.bold,
290
+ 'italic': self.italic,
291
+ 'color': self.color
292
+ }
293
+
294
+ # Include raw font properties from the PDF
295
+ font_props = [
296
+ 'flags', 'stemv', 'StemV', 'weight', 'FontWeight',
297
+ 'render_mode', 'stroke_width', 'lineWidth'
298
+ ]
299
+
300
+ for prop in font_props:
301
+ if prop in self._obj:
302
+ info[f"raw_{prop}"] = self._obj[prop]
303
+
304
+ return info
@@ -0,0 +1,62 @@
1
+ """
2
+ OCR engines for natural-pdf.
3
+
4
+ This module provides different OCR engines that can be used with natural-pdf.
5
+ """
6
+ import logging
7
+
8
+ # Set up module logger
9
+ logger = logging.getLogger("natural_pdf.ocr")
10
+ from .engine import OCREngine
11
+ from .easyocr_engine import EasyOCREngine
12
+
13
+ # Try to import PaddleOCR engine, but don't fail if it's not available
14
+ try:
15
+ from .paddleocr_engine import PaddleOCREngine
16
+ __all__ = ['OCREngine', 'EasyOCREngine', 'PaddleOCREngine']
17
+ except ImportError:
18
+ __all__ = ['OCREngine', 'EasyOCREngine']
19
+
20
+ # Default engine to use if none is specified
21
+ try:
22
+ from .paddleocr_engine import PaddleOCREngine
23
+ DEFAULT_ENGINE = PaddleOCREngine # Use PaddleOCR as default if available
24
+ except ImportError:
25
+ DEFAULT_ENGINE = EasyOCREngine # Fall back to EasyOCR if PaddleOCR is not available
26
+
27
+ def get_engine(engine_name=None, **kwargs):
28
+ """
29
+ Get OCR engine by name.
30
+
31
+ Args:
32
+ engine_name: Name of the engine to use ('easyocr', 'paddleocr', etc.)
33
+ If None, the default engine is used (PaddleOCR if available, otherwise EasyOCR)
34
+ **kwargs: Additional arguments to pass to the engine constructor
35
+
36
+ Returns:
37
+ OCREngine instance
38
+ """
39
+ logger.debug(f"Initializing OCR engine: {engine_name or 'default'}")
40
+
41
+ if engine_name is None or engine_name == 'default':
42
+ engine = DEFAULT_ENGINE(**kwargs)
43
+ logger.info(f"Using default OCR engine: {engine.__class__.__name__}")
44
+ return engine
45
+
46
+ if engine_name.lower() == 'easyocr':
47
+ logger.info("Initializing EasyOCR engine")
48
+ return EasyOCREngine(**kwargs)
49
+
50
+ if engine_name.lower() == 'paddleocr':
51
+ try:
52
+ from .paddleocr_engine import PaddleOCREngine
53
+ logger.info("Initializing PaddleOCR engine")
54
+ return PaddleOCREngine(**kwargs)
55
+ except ImportError:
56
+ logger.error("PaddleOCR is not installed")
57
+ raise ImportError(
58
+ "PaddleOCR is not installed. Please install it with: pip install paddlepaddle paddleocr"
59
+ )
60
+
61
+ logger.error(f"Unknown OCR engine: {engine_name}")
62
+ raise ValueError(f"Unknown OCR engine: {engine_name}")
@@ -0,0 +1,254 @@
1
+ """
2
+ EasyOCR engine implementation.
3
+ """
4
+ import importlib.util
5
+ from typing import Dict, List, Any, Optional
6
+ import numpy as np
7
+ from PIL import Image
8
+
9
+ from .engine import OCREngine
10
+
11
+
12
+ class EasyOCREngine(OCREngine):
13
+ """EasyOCR implementation."""
14
+
15
+ def __init__(self, **kwargs):
16
+ """
17
+ Initialize EasyOCR engine with optional settings.
18
+
19
+ Args:
20
+ **kwargs: Engine-specific settings
21
+ """
22
+ super().__init__(**kwargs)
23
+ self._readers = {} # Cache for readers
24
+
25
+ # Store initialization settings to use in model initialization
26
+ self._init_settings = kwargs
27
+
28
+ def is_available(self) -> bool:
29
+ """
30
+ Check if EasyOCR is installed.
31
+
32
+ Returns:
33
+ True if EasyOCR is available, False otherwise
34
+ """
35
+ try:
36
+ import easyocr
37
+ return True
38
+ except ImportError:
39
+ return False
40
+
41
+ def get_reader(self, config: Dict[str, Any]):
42
+ """
43
+ Get or initialize an EasyOCR reader based on configuration.
44
+
45
+ Args:
46
+ config: OCR configuration
47
+
48
+ Returns:
49
+ EasyOCR reader instance
50
+ """
51
+ print(f"EasyOCR.get_reader: Config = {config}")
52
+
53
+ # Get languages from config
54
+ languages = config.get("languages", ["en"])
55
+ print(f"EasyOCR.get_reader: Languages = {languages}")
56
+
57
+ # Create a cache key from languages
58
+ cache_key = f"easyocr_{'-'.join(languages)}"
59
+ print(f"EasyOCR.get_reader: Cache key = {cache_key}")
60
+
61
+ # Return cached reader if available
62
+ if cache_key in self._readers:
63
+ print(f"EasyOCR.get_reader: Using cached reader")
64
+ return self._readers[cache_key]
65
+
66
+ # Check if easyocr is installed
67
+ if not importlib.util.find_spec("easyocr"):
68
+ print(f"EasyOCR.get_reader: EasyOCR not installed")
69
+ raise ImportError(
70
+ "EasyOCR is not installed. Please install it with: pip install easyocr"
71
+ )
72
+
73
+ # Import easyocr
74
+ print(f"EasyOCR.get_reader: Importing easyocr")
75
+ import easyocr
76
+
77
+ # Start with initialization settings
78
+ reader_kwargs = self._init_settings.copy()
79
+ print(f"EasyOCR.get_reader: Init settings = {reader_kwargs}")
80
+
81
+ # Add languages parameter
82
+ reader_kwargs["lang_list"] = languages
83
+
84
+ # Handle device parameter mapping
85
+ if "device" in config:
86
+ device = config["device"]
87
+ if device.startswith("cuda"):
88
+ reader_kwargs["gpu"] = True
89
+ else:
90
+ reader_kwargs["gpu"] = False
91
+ print(f"EasyOCR.get_reader: Set gpu={reader_kwargs.get('gpu', False)} from device={device}")
92
+
93
+ # Apply model_settings if provided
94
+ model_settings = config.get("model_settings", {})
95
+ reader_kwargs.update(model_settings)
96
+ print(f"EasyOCR.get_reader: Final kwargs = {reader_kwargs}")
97
+
98
+ # Create reader with specified settings
99
+ print(f"EasyOCR.get_reader: Creating EasyOCR.Reader with lang_list={languages}")
100
+ try:
101
+ reader = easyocr.Reader(**reader_kwargs)
102
+ print(f"EasyOCR.get_reader: Successfully created reader")
103
+ except Exception as e:
104
+ print(f"EasyOCR.get_reader: Error creating reader: {e}")
105
+ import traceback
106
+ traceback.print_exc()
107
+ raise
108
+
109
+ # Cache reader
110
+ self._readers[cache_key] = reader
111
+ print(f"EasyOCR.get_reader: Reader cached with key {cache_key}")
112
+ return reader
113
+
114
+ def process_image(self, image: Image.Image, config: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
115
+ """
116
+ Process an image with EasyOCR.
117
+
118
+ Args:
119
+ image: PIL Image to process
120
+ config: OCR configuration
121
+
122
+ Returns:
123
+ List of standardized OCR results
124
+ """
125
+ print(f"EasyOCR.process_image: Starting with image type {type(image)}, size {image.width}x{image.height if hasattr(image, 'height') else 'unknown'}")
126
+
127
+ # Save image for debugging
128
+ try:
129
+ import os
130
+ debug_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "output")
131
+ os.makedirs(debug_dir, exist_ok=True)
132
+ debug_path = os.path.join(debug_dir, "easyocr_debug_input.png")
133
+ if isinstance(image, Image.Image):
134
+ image.save(debug_path)
135
+ print(f"EasyOCR.process_image: Saved input image to {debug_path}")
136
+ except Exception as e:
137
+ print(f"EasyOCR.process_image: Could not save debug image: {e}")
138
+
139
+ # Normalize config
140
+ if config is None:
141
+ config = {}
142
+ print(f"EasyOCR.process_image: Raw config = {config}")
143
+ config = self.normalize_config(config)
144
+ print(f"EasyOCR.process_image: Normalized config = {config}")
145
+
146
+ # Skip if OCR is disabled
147
+ if not config.get("enabled"):
148
+ print(f"EasyOCR.process_image: OCR is disabled in config, returning empty list")
149
+ return []
150
+
151
+ # Direct test with known working code for debug
152
+ print(f"EasyOCR.process_image: Running direct test with EasyOCR")
153
+ try:
154
+ import easyocr
155
+ raw_reader = easyocr.Reader(['en'])
156
+ import numpy as np
157
+ img_array = np.array(image)
158
+ direct_result = raw_reader.readtext(img_array)
159
+ print(f"EasyOCR.process_image: Direct test got {len(direct_result)} results")
160
+ except Exception as e:
161
+ print(f"EasyOCR.process_image: Direct test failed: {e}")
162
+
163
+ # Get reader
164
+ reader = self.get_reader(config)
165
+
166
+ # Convert PIL Image to numpy array if needed
167
+ if isinstance(image, Image.Image):
168
+ img_array = np.array(image)
169
+ else:
170
+ img_array = image
171
+
172
+ # Extract model_settings for readtext parameters
173
+ model_settings = config.get("model_settings", {})
174
+
175
+ # For backward compatibility, handle both flattened and nested parameters
176
+ readtext_kwargs = {}
177
+
178
+ # Add all model_settings to kwargs
179
+ readtext_kwargs.update(model_settings)
180
+
181
+ # For backward compatibility, also check nested structures
182
+ detection_params = config.get("detection_params", {})
183
+ recognition_params = config.get("recognition_params", {})
184
+
185
+ # Add nested params if provided
186
+ if detection_params:
187
+ for key, value in detection_params.items():
188
+ if key not in readtext_kwargs:
189
+ readtext_kwargs[key] = value
190
+
191
+ if recognition_params:
192
+ for key, value in recognition_params.items():
193
+ if key not in readtext_kwargs:
194
+ readtext_kwargs[key] = value
195
+
196
+ # Run OCR with all parameters
197
+ print(f"EasyOCR: Running OCR with parameters: {readtext_kwargs}")
198
+ try:
199
+ result = reader.readtext(img_array, **readtext_kwargs)
200
+ print(f"EasyOCR: Got {len(result)} results")
201
+ except Exception as e:
202
+ print(f"EasyOCR error: {e}")
203
+ import traceback
204
+ traceback.print_exc()
205
+ return []
206
+
207
+ # Apply minimum confidence threshold
208
+ min_confidence = config.get("min_confidence", 0.5)
209
+
210
+ # Convert to standardized format
211
+ standardized_results = []
212
+
213
+ for detection in result:
214
+ # Check the format based on what was returned
215
+ if isinstance(detection, list) and len(detection) >= 3:
216
+ # This is the detailed format (detail=1)
217
+ bbox = detection[0] # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
218
+ text = detection[1]
219
+ confidence = detection[2]
220
+
221
+ # Skip if confidence is below threshold
222
+ if confidence < min_confidence:
223
+ continue
224
+
225
+ # Convert polygon bbox to rectangle (x0, y0, x1, y1)
226
+ x_coords = [point[0] for point in bbox]
227
+ y_coords = [point[1] for point in bbox]
228
+
229
+ x0 = min(x_coords)
230
+ y0 = min(y_coords)
231
+ x1 = max(x_coords)
232
+ y1 = max(y_coords)
233
+
234
+ standardized_results.append({
235
+ 'bbox': (x0, y0, x1, y1),
236
+ 'text': text,
237
+ 'confidence': confidence,
238
+ 'source': 'ocr'
239
+ })
240
+ elif isinstance(detection, str):
241
+ # Simple format (detail=0), no bbox or confidence
242
+ standardized_results.append({
243
+ 'bbox': (0, 0, 1, 1), # Dummy bbox
244
+ 'text': detection,
245
+ 'confidence': 1.0, # Default confidence
246
+ 'source': 'ocr'
247
+ })
248
+
249
+ return standardized_results
250
+
251
+ def __del__(self):
252
+ """Cleanup resources when the engine is deleted."""
253
+ # Clear reader cache to free up memory
254
+ self._readers.clear()