natural-pdf 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/ocr/index.md +34 -47
- docs/tutorials/01-loading-and-extraction.ipynb +60 -46
- docs/tutorials/02-finding-elements.ipynb +42 -42
- docs/tutorials/03-extracting-blocks.ipynb +17 -17
- docs/tutorials/04-table-extraction.ipynb +12 -12
- docs/tutorials/05-excluding-content.ipynb +30 -30
- docs/tutorials/06-document-qa.ipynb +28 -28
- docs/tutorials/07-layout-analysis.ipynb +63 -35
- docs/tutorials/07-working-with-regions.ipynb +55 -51
- docs/tutorials/07-working-with-regions.md +2 -2
- docs/tutorials/08-spatial-navigation.ipynb +60 -60
- docs/tutorials/09-section-extraction.ipynb +113 -113
- docs/tutorials/10-form-field-extraction.ipynb +78 -50
- docs/tutorials/11-enhanced-table-processing.ipynb +6 -6
- docs/tutorials/12-ocr-integration.ipynb +149 -131
- docs/tutorials/12-ocr-integration.md +0 -13
- docs/tutorials/13-semantic-search.ipynb +313 -873
- natural_pdf/__init__.py +21 -23
- natural_pdf/analyzers/layout/gemini.py +264 -0
- natural_pdf/analyzers/layout/layout_manager.py +28 -1
- natural_pdf/analyzers/layout/layout_options.py +11 -0
- natural_pdf/analyzers/layout/yolo.py +6 -2
- natural_pdf/collections/pdf_collection.py +21 -0
- natural_pdf/core/element_manager.py +16 -13
- natural_pdf/core/page.py +165 -36
- natural_pdf/core/pdf.py +146 -41
- natural_pdf/elements/base.py +11 -17
- natural_pdf/elements/collections.py +100 -38
- natural_pdf/elements/region.py +77 -38
- natural_pdf/elements/text.py +5 -0
- natural_pdf/ocr/__init__.py +49 -36
- natural_pdf/ocr/engine.py +146 -51
- natural_pdf/ocr/engine_easyocr.py +141 -161
- natural_pdf/ocr/engine_paddle.py +107 -193
- natural_pdf/ocr/engine_surya.py +75 -148
- natural_pdf/ocr/ocr_factory.py +114 -0
- natural_pdf/ocr/ocr_manager.py +65 -93
- natural_pdf/ocr/ocr_options.py +7 -17
- natural_pdf/ocr/utils.py +98 -0
- natural_pdf/templates/spa/css/style.css +334 -0
- natural_pdf/templates/spa/index.html +31 -0
- natural_pdf/templates/spa/js/app.js +472 -0
- natural_pdf/templates/spa/words.txt +235976 -0
- natural_pdf/utils/debug.py +32 -0
- natural_pdf/utils/identifiers.py +29 -0
- natural_pdf/utils/packaging.py +418 -0
- {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +41 -19
- {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/RECORD +51 -44
- {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
- {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/top_level.txt +0 -1
- natural_pdf/templates/ocr_debug.html +0 -517
- tests/test_loading.py +0 -50
- tests/test_optional_deps.py +0 -298
- {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
natural_pdf/ocr/engine.py
CHANGED
@@ -11,35 +11,137 @@ from .ocr_options import BaseOCROptions
|
|
11
11
|
logger = logging.getLogger(__name__)
|
12
12
|
|
13
13
|
|
14
|
+
class TextRegion:
|
15
|
+
"""Standard representation of an OCR text region."""
|
16
|
+
|
17
|
+
def __init__(self, bbox: Tuple[float, float, float, float], text: str, confidence: float, source: str = "ocr"):
|
18
|
+
"""
|
19
|
+
Initialize a text region.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
bbox: Tuple of (x0, y0, x1, y1) coordinates
|
23
|
+
text: The recognized text
|
24
|
+
confidence: Confidence score (0.0-1.0)
|
25
|
+
source: Source of the text region (default: "ocr")
|
26
|
+
"""
|
27
|
+
self.bbox = bbox
|
28
|
+
self.text = text
|
29
|
+
self.confidence = confidence
|
30
|
+
self.source = source
|
31
|
+
|
32
|
+
@classmethod
|
33
|
+
def from_polygon(cls, polygon: List[List[float]], text: str, confidence: float):
|
34
|
+
"""Create from polygon coordinates [[x1,y1], [x2,y2], ...]"""
|
35
|
+
x_coords = [float(point[0]) for point in polygon]
|
36
|
+
y_coords = [float(point[1]) for point in polygon]
|
37
|
+
bbox = (min(x_coords), min(y_coords), max(x_coords), max(y_coords))
|
38
|
+
return cls(bbox, text, confidence)
|
39
|
+
|
40
|
+
def to_dict(self) -> Dict[str, Any]:
|
41
|
+
"""Convert to dictionary representation for compatibility."""
|
42
|
+
return {
|
43
|
+
"bbox": self.bbox,
|
44
|
+
"text": self.text,
|
45
|
+
"confidence": self.confidence,
|
46
|
+
"source": self.source
|
47
|
+
}
|
48
|
+
|
49
|
+
|
14
50
|
class OCREngine(ABC):
|
15
51
|
"""Abstract Base Class for OCR engines."""
|
52
|
+
|
53
|
+
# Default values as class constants
|
54
|
+
DEFAULT_MIN_CONFIDENCE = 0.2
|
55
|
+
DEFAULT_LANGUAGES = ['en']
|
56
|
+
DEFAULT_DEVICE = 'cpu'
|
16
57
|
|
17
58
|
def __init__(self):
|
18
59
|
"""Initializes the base OCR engine."""
|
19
60
|
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
20
61
|
self.logger.info(f"Initializing {self.__class__.__name__}")
|
62
|
+
self._model = None
|
63
|
+
self._initialized = False
|
21
64
|
self._reader_cache = {} # Cache for initialized models/readers
|
22
65
|
|
23
|
-
@abstractmethod
|
24
66
|
def process_image(
|
25
67
|
self,
|
26
|
-
images: Union[Image.Image, List[Image.Image]],
|
27
|
-
|
28
|
-
|
68
|
+
images: Union[Image.Image, List[Image.Image]],
|
69
|
+
languages: Optional[List[str]] = None,
|
70
|
+
min_confidence: Optional[float] = None,
|
71
|
+
device: Optional[str] = None,
|
72
|
+
detect_only: bool = False,
|
73
|
+
options: Optional[BaseOCROptions] = None,
|
74
|
+
) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
|
29
75
|
"""
|
30
|
-
|
31
|
-
|
76
|
+
Process a single image or batch of images with OCR.
|
77
|
+
|
32
78
|
Args:
|
33
|
-
images: A single PIL Image or a list of PIL Images
|
34
|
-
|
35
|
-
|
36
|
-
|
79
|
+
images: A single PIL Image or a list of PIL Images
|
80
|
+
languages: List of languages to use (default: ['en'])
|
81
|
+
min_confidence: Minimum confidence threshold (default: 0.2)
|
82
|
+
device: Device to use for processing (default: 'cpu')
|
83
|
+
detect_only: Whether to only detect text regions without recognition
|
84
|
+
options: Engine-specific options
|
85
|
+
|
37
86
|
Returns:
|
38
|
-
|
39
|
-
|
40
|
-
corresponding to each input image.
|
41
|
-
An empty list indicates failure for that image.
|
87
|
+
For a single image: List of text region dictionaries
|
88
|
+
For a batch: List of lists of text region dictionaries
|
42
89
|
"""
|
90
|
+
# Convert single image to batch format
|
91
|
+
single_image = not isinstance(images, list)
|
92
|
+
image_batch = [images] if single_image else images
|
93
|
+
|
94
|
+
# Use default values where parameters are not provided
|
95
|
+
effective_languages = languages or self.DEFAULT_LANGUAGES
|
96
|
+
effective_confidence = min_confidence if min_confidence is not None else self.DEFAULT_MIN_CONFIDENCE
|
97
|
+
effective_device = device or self.DEFAULT_DEVICE
|
98
|
+
|
99
|
+
# Ensure the model is initialized
|
100
|
+
self._ensure_initialized(effective_languages, effective_device, options)
|
101
|
+
|
102
|
+
# Process each image in the batch
|
103
|
+
results = []
|
104
|
+
for img in image_batch:
|
105
|
+
# Preprocess the image for the specific engine
|
106
|
+
processed_img = self._preprocess_image(img)
|
107
|
+
|
108
|
+
# Process the image with the engine-specific implementation
|
109
|
+
raw_results = self._process_single_image(processed_img, detect_only, options)
|
110
|
+
|
111
|
+
# Convert results to standardized format
|
112
|
+
text_regions = self._standardize_results(raw_results, effective_confidence, detect_only)
|
113
|
+
|
114
|
+
# Convert TextRegion objects to dictionaries for backward compatibility
|
115
|
+
region_dicts = [region.to_dict() for region in text_regions]
|
116
|
+
results.append(region_dicts)
|
117
|
+
|
118
|
+
# Return results in the appropriate format
|
119
|
+
return results[0] if single_image else results
|
120
|
+
|
121
|
+
def _ensure_initialized(self, languages: List[str], device: str, options: Optional[BaseOCROptions]):
|
122
|
+
"""Ensure the model is initialized with the correct parameters."""
|
123
|
+
if not self._initialized:
|
124
|
+
self._initialize_model(languages, device, options)
|
125
|
+
self._initialized = True
|
126
|
+
|
127
|
+
@abstractmethod
|
128
|
+
def _initialize_model(self, languages: List[str], device: str, options: Optional[BaseOCROptions]):
|
129
|
+
"""Initialize the OCR model with the given parameters."""
|
130
|
+
raise NotImplementedError("Subclasses must implement this method")
|
131
|
+
|
132
|
+
@abstractmethod
|
133
|
+
def _preprocess_image(self, image: Image.Image) -> Any:
|
134
|
+
"""Convert PIL Image to engine-specific format."""
|
135
|
+
raise NotImplementedError("Subclasses must implement this method")
|
136
|
+
|
137
|
+
@abstractmethod
|
138
|
+
def _process_single_image(self, image: Any, detect_only: bool, options: Optional[BaseOCROptions]) -> Any:
|
139
|
+
"""Process a single image with the initialized model."""
|
140
|
+
raise NotImplementedError("Subclasses must implement this method")
|
141
|
+
|
142
|
+
@abstractmethod
|
143
|
+
def _standardize_results(self, raw_results: Any, min_confidence: float, detect_only: bool) -> List[TextRegion]:
|
144
|
+
"""Convert engine-specific results to standardized TextRegion objects."""
|
43
145
|
raise NotImplementedError("Subclasses must implement this method")
|
44
146
|
|
45
147
|
@abstractmethod
|
@@ -63,48 +165,41 @@ class OCREngine(ABC):
|
|
63
165
|
Returns:
|
64
166
|
A string cache key.
|
65
167
|
"""
|
66
|
-
|
67
|
-
|
68
|
-
device_key = str(options.device).lower()
|
168
|
+
lang_key = "-".join(sorted(getattr(options, "languages", self.DEFAULT_LANGUAGES)))
|
169
|
+
device_key = str(getattr(options, "device", self.DEFAULT_DEVICE)).lower()
|
69
170
|
return f"{self.__class__.__name__}_{lang_key}_{device_key}"
|
70
171
|
|
71
|
-
def _standardize_bbox(self, bbox: Any) ->
|
72
|
-
"""
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
bbox
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
Returns:
|
82
|
-
Tuple[float, float, float, float] or None if conversion fails.
|
83
|
-
"""
|
84
|
-
try:
|
85
|
-
if (
|
86
|
-
isinstance(bbox, (list, tuple))
|
87
|
-
and len(bbox) == 4
|
88
|
-
and all(isinstance(n, (int, float)) for n in bbox)
|
89
|
-
):
|
90
|
-
# Already in (x0, y0, x1, y1) format (or similar)
|
172
|
+
def _standardize_bbox(self, bbox: Any) -> Tuple[float, float, float, float]:
|
173
|
+
"""Standardizes bounding boxes to (x0, y0, x1, y1) format. Raises ValueError if standardization fails."""
|
174
|
+
# Check if it's already in the correct tuple/list format
|
175
|
+
if (
|
176
|
+
isinstance(bbox, (list, tuple))
|
177
|
+
and len(bbox) == 4
|
178
|
+
and all(isinstance(n, (int, float)) for n in bbox)
|
179
|
+
):
|
180
|
+
try:
|
91
181
|
return tuple(float(c) for c in bbox[:4])
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
182
|
+
except (ValueError, TypeError) as e:
|
183
|
+
raise ValueError(f"Invalid number format in bbox: {bbox}") from e
|
184
|
+
|
185
|
+
# Check if it's in polygon format [[x1,y1],[x2,y2],...]
|
186
|
+
elif (
|
187
|
+
isinstance(bbox, (list, tuple))
|
188
|
+
and len(bbox) > 0
|
189
|
+
and isinstance(bbox[0], (list, tuple))
|
190
|
+
and len(bbox[0]) == 2 # Ensure points are pairs
|
191
|
+
):
|
192
|
+
try:
|
98
193
|
x_coords = [float(point[0]) for point in bbox]
|
99
194
|
y_coords = [float(point[1]) for point in bbox]
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
195
|
+
if not x_coords or not y_coords: # Handle empty polygon case
|
196
|
+
raise ValueError("Empty polygon provided")
|
197
|
+
return (min(x_coords), min(y_coords), max(x_coords), max(y_coords))
|
198
|
+
except (ValueError, TypeError, IndexError) as e:
|
199
|
+
raise ValueError(f"Invalid polygon format or values: {bbox}") from e
|
200
|
+
|
201
|
+
# If it's neither format, raise an error
|
202
|
+
raise ValueError(f"Could not standardize bounding box from unexpected format: {bbox}")
|
108
203
|
|
109
204
|
def __del__(self):
|
110
205
|
"""Cleanup resources when the engine is deleted."""
|
@@ -1,13 +1,12 @@
|
|
1
1
|
# ocr_engine_easyocr.py
|
2
2
|
import importlib.util
|
3
|
-
import inspect # Used for dynamic parameter passing
|
4
3
|
import logging
|
5
4
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
6
5
|
|
7
6
|
import numpy as np
|
8
7
|
from PIL import Image
|
9
8
|
|
10
|
-
from .engine import OCREngine
|
9
|
+
from .engine import OCREngine, TextRegion
|
11
10
|
from .ocr_options import BaseOCROptions, EasyOCROptions
|
12
11
|
|
13
12
|
logger = logging.getLogger(__name__)
|
@@ -18,178 +17,159 @@ class EasyOCREngine(OCREngine):
|
|
18
17
|
|
19
18
|
def __init__(self):
|
20
19
|
super().__init__()
|
21
|
-
|
22
|
-
|
23
|
-
def _lazy_import_easyocr(self):
|
24
|
-
"""Imports easyocr only when needed."""
|
25
|
-
if self._easyocr is None:
|
26
|
-
if not self.is_available():
|
27
|
-
raise ImportError("EasyOCR is not installed or available.")
|
28
|
-
try:
|
29
|
-
import easyocr
|
30
|
-
|
31
|
-
self._easyocr = easyocr
|
32
|
-
logger.info("EasyOCR module imported successfully.")
|
33
|
-
except ImportError as e:
|
34
|
-
logger.error(f"Failed to import EasyOCR: {e}")
|
35
|
-
raise
|
36
|
-
return self._easyocr
|
20
|
+
# No longer need _easyocr attribute
|
21
|
+
# self._easyocr = None
|
37
22
|
|
38
23
|
def is_available(self) -> bool:
|
39
24
|
"""Check if EasyOCR is installed."""
|
40
25
|
return importlib.util.find_spec("easyocr") is not None
|
41
26
|
|
42
|
-
def
|
43
|
-
"""
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
if
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
27
|
+
def _initialize_model(self, languages: List[str], device: str, options: Optional[BaseOCROptions]):
|
28
|
+
"""Initialize the EasyOCR model."""
|
29
|
+
# Import directly here
|
30
|
+
try:
|
31
|
+
import easyocr
|
32
|
+
self.logger.info("EasyOCR module imported successfully.")
|
33
|
+
except ImportError as e:
|
34
|
+
self.logger.error(f"Failed to import EasyOCR: {e}")
|
35
|
+
raise
|
36
|
+
|
37
|
+
# Cast to EasyOCROptions if possible, otherwise use default
|
38
|
+
easy_options = options if isinstance(options, EasyOCROptions) else EasyOCROptions()
|
39
|
+
|
40
|
+
# Prepare constructor arguments
|
41
|
+
use_gpu = "cuda" in device.lower() or "mps" in device.lower()
|
42
|
+
|
43
|
+
constructor_args = {
|
44
|
+
"lang_list": languages,
|
45
|
+
"gpu": use_gpu,
|
46
|
+
# Explicitly map relevant options
|
47
|
+
"model_storage_directory": easy_options.model_storage_directory,
|
48
|
+
"user_network_directory": easy_options.user_network_directory,
|
49
|
+
"recog_network": easy_options.recog_network,
|
50
|
+
"detect_network": easy_options.detect_network,
|
51
|
+
"download_enabled": easy_options.download_enabled,
|
52
|
+
"detector": easy_options.detector,
|
53
|
+
"recognizer": easy_options.recognizer,
|
54
|
+
"verbose": easy_options.verbose,
|
55
|
+
"quantize": easy_options.quantize,
|
56
|
+
"cudnn_benchmark": easy_options.cudnn_benchmark,
|
57
|
+
}
|
58
|
+
|
59
|
+
# Filter out None values, as EasyOCR expects non-None or default behaviour
|
60
|
+
constructor_args = {k: v for k, v in constructor_args.items() if v is not None}
|
61
|
+
|
62
|
+
self.logger.debug(f"EasyOCR Reader constructor args: {constructor_args}")
|
63
|
+
|
64
|
+
# Create the reader
|
76
65
|
try:
|
77
|
-
|
78
|
-
self.
|
79
|
-
logger.info("EasyOCR reader created successfully.")
|
80
|
-
return reader
|
66
|
+
self._model = easyocr.Reader(**constructor_args)
|
67
|
+
self.logger.info("EasyOCR reader created successfully")
|
81
68
|
except Exception as e:
|
82
|
-
logger.error(f"Failed to create EasyOCR reader: {e}"
|
69
|
+
self.logger.error(f"Failed to create EasyOCR reader: {e}")
|
83
70
|
raise
|
84
71
|
|
85
|
-
def
|
86
|
-
"""
|
87
|
-
|
72
|
+
def _preprocess_image(self, image: Image.Image) -> np.ndarray:
|
73
|
+
"""Convert PIL Image to numpy array for EasyOCR."""
|
74
|
+
return np.array(image)
|
75
|
+
|
76
|
+
def _process_single_image(self, image: np.ndarray, detect_only: bool, options: Optional[EasyOCROptions]) -> Any:
|
77
|
+
"""Process a single image with EasyOCR."""
|
78
|
+
if self._model is None:
|
79
|
+
raise RuntimeError("EasyOCR model not initialized")
|
80
|
+
|
81
|
+
# Cast options to proper type if provided
|
82
|
+
easy_options = options if isinstance(options, EasyOCROptions) else EasyOCROptions()
|
83
|
+
|
84
|
+
# Prepare readtext arguments (only needed if not detect_only)
|
88
85
|
readtext_args = {}
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
86
|
+
if not detect_only:
|
87
|
+
for param in [
|
88
|
+
"detail", "paragraph", "min_size", "contrast_ths", "adjust_contrast",
|
89
|
+
"filter_ths", "text_threshold", "low_text", "link_threshold",
|
90
|
+
"canvas_size", "mag_ratio", "slope_ths", "ycenter_ths", "height_ths",
|
91
|
+
"width_ths", "y_ths", "x_ths", "add_margin", "output_format"
|
92
|
+
]:
|
93
|
+
if hasattr(easy_options, param):
|
94
|
+
val = getattr(easy_options, param)
|
95
|
+
if val is not None:
|
96
|
+
readtext_args[param] = val
|
97
|
+
|
98
|
+
# Process differently based on detect_only flag
|
99
|
+
if detect_only:
|
100
|
+
# Returns tuple (horizontal_list, free_list)
|
101
|
+
# horizontal_list is a list containing one item: the list of boxes
|
102
|
+
# Each box is [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
|
103
|
+
bboxes_tuple = self._model.detect(image, **readtext_args) # Pass args here too? Check EasyOCR docs if needed.
|
104
|
+
if bboxes_tuple and isinstance(bboxes_tuple, tuple) and len(bboxes_tuple) > 0 and isinstance(bboxes_tuple[0], list):
|
105
|
+
return bboxes_tuple[0] # Return the list of polygons directly
|
106
|
+
else:
|
107
|
+
self.logger.warning(f"EasyOCR detect returned unexpected format: {bboxes_tuple}")
|
108
|
+
return [] # Return empty list on unexpected format
|
109
|
+
else:
|
110
|
+
return self._model.readtext(image, **readtext_args)
|
111
|
+
|
112
|
+
def _standardize_results(self, raw_results: Any, min_confidence: float, detect_only: bool) -> List[TextRegion]:
|
113
|
+
"""Convert EasyOCR results to standardized TextRegion objects."""
|
114
|
+
standardized_regions = []
|
115
|
+
|
116
|
+
if detect_only:
|
117
|
+
# In detect_only mode, raw_results is already a list of bounding boxes
|
118
|
+
# Each bbox is in [x_min, x_max, y_min, y_max] format
|
119
|
+
if isinstance(raw_results, list):
|
120
|
+
for detection in raw_results:
|
121
|
+
try:
|
122
|
+
if isinstance(detection, (list, tuple)) and len(detection) == 4:
|
123
|
+
x_min, x_max, y_min, y_max = detection
|
124
|
+
# Convert to standardized (x0, y0, x1, y1) format
|
125
|
+
try:
|
126
|
+
bbox = (float(x_min), float(y_min), float(x_max), float(y_max))
|
127
|
+
standardized_regions.append(TextRegion(bbox, text=None, confidence=None))
|
128
|
+
except (ValueError, TypeError) as e:
|
129
|
+
raise ValueError(f"Invalid number format in EasyOCR detect bbox: {detection}") from e
|
130
|
+
else:
|
131
|
+
raise ValueError(f"Invalid detection format from EasyOCR: {detection}")
|
132
|
+
except ValueError as e:
|
133
|
+
# Re-raise any value errors from standardization or format checks
|
134
|
+
raise e
|
135
|
+
except Exception as e:
|
136
|
+
# Catch other potential processing errors
|
137
|
+
raise ValueError(f"Error processing EasyOCR detection item: {detection}") from e
|
138
|
+
else:
|
139
|
+
raise ValueError(f"Expected list of bounding boxes in detect_only mode, got: {raw_results}")
|
140
|
+
|
141
|
+
return standardized_regions
|
142
|
+
|
143
|
+
# Full OCR mode (readtext results)
|
106
144
|
for detection in raw_results:
|
107
145
|
try:
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
and len(detection) >= 3
|
112
|
-
):
|
113
|
-
bbox_raw = detection[0]
|
146
|
+
# Detail mode (list/tuple result)
|
147
|
+
if isinstance(detection, (list, tuple)) and len(detection) >= 3:
|
148
|
+
bbox_raw = detection[0] # This is usually a polygon [[x1,y1],...]
|
114
149
|
text = str(detection[1])
|
115
150
|
confidence = float(detection[2])
|
116
|
-
|
151
|
+
|
117
152
|
if confidence >= min_confidence:
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
logger.warning(f"Skipping invalid detection format: {detection}. Error: {e}")
|
137
|
-
continue
|
138
|
-
return standardized_results
|
139
|
-
|
140
|
-
def process_image(
|
141
|
-
self, images: Union[Image.Image, List[Image.Image]], options: BaseOCROptions
|
142
|
-
) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
|
143
|
-
"""Processes a single image or a batch of images with EasyOCR."""
|
144
|
-
|
145
|
-
if not isinstance(options, EasyOCROptions):
|
146
|
-
logger.warning("Received BaseOCROptions, expected EasyOCROptions. Using defaults.")
|
147
|
-
# Create default EasyOCR options if base was passed, preserving base settings
|
148
|
-
options = EasyOCROptions(
|
149
|
-
languages=options.languages,
|
150
|
-
min_confidence=options.min_confidence,
|
151
|
-
device=options.device,
|
152
|
-
extra_args=options.extra_args, # Pass along any extra args
|
153
|
-
)
|
154
|
-
|
155
|
-
reader = self._get_reader(options)
|
156
|
-
readtext_args = self._prepare_readtext_args(options, reader)
|
157
|
-
|
158
|
-
# --- Handle single image or batch ---
|
159
|
-
if isinstance(images, list):
|
160
|
-
# --- Batch Processing (Iterative for EasyOCR) ---
|
161
|
-
all_results = []
|
162
|
-
logger.info(f"Processing batch of {len(images)} images with EasyOCR (iteratively)...")
|
163
|
-
for i, img in enumerate(images):
|
164
|
-
if not isinstance(img, Image.Image):
|
165
|
-
logger.warning(f"Item at index {i} in batch is not a PIL Image. Skipping.")
|
166
|
-
all_results.append([])
|
167
|
-
continue
|
168
|
-
img_array = np.array(img)
|
169
|
-
try:
|
170
|
-
logger.debug(f"Processing image {i+1}/{len(images)} in batch.")
|
171
|
-
raw_results = reader.readtext(img_array, **readtext_args)
|
172
|
-
standardized = self._standardize_results(raw_results, options)
|
173
|
-
all_results.append(standardized)
|
174
|
-
except Exception as e:
|
175
|
-
logger.error(
|
176
|
-
f"Error processing image {i+1} in EasyOCR batch: {e}", exc_info=True
|
177
|
-
)
|
178
|
-
all_results.append([]) # Append empty list for failed image
|
179
|
-
logger.info(f"Finished processing batch with EasyOCR.")
|
180
|
-
return all_results # Return List[List[Dict]]
|
181
|
-
|
182
|
-
elif isinstance(images, Image.Image):
|
183
|
-
# --- Single Image Processing ---
|
184
|
-
logger.info("Processing single image with EasyOCR...")
|
185
|
-
img_array = np.array(images)
|
186
|
-
try:
|
187
|
-
raw_results = reader.readtext(img_array, **readtext_args)
|
188
|
-
standardized = self._standardize_results(raw_results, options)
|
189
|
-
logger.info(f"Finished processing single image. Found {len(standardized)} results.")
|
190
|
-
return standardized # Return List[Dict]
|
153
|
+
try:
|
154
|
+
# Use the standard helper for polygons
|
155
|
+
bbox = self._standardize_bbox(bbox_raw)
|
156
|
+
standardized_regions.append(TextRegion(bbox, text, confidence))
|
157
|
+
except ValueError as e:
|
158
|
+
raise ValueError(f"Could not standardize bounding box from EasyOCR readtext: {bbox_raw}") from e
|
159
|
+
|
160
|
+
# Simple mode (string result)
|
161
|
+
elif isinstance(detection, str):
|
162
|
+
if 0.0 >= min_confidence: # Always include if min_confidence is 0
|
163
|
+
standardized_regions.append(TextRegion((0, 0, 0, 0), detection, 1.0))
|
164
|
+
else:
|
165
|
+
# Handle unexpected format in OCR mode
|
166
|
+
raise ValueError(f"Invalid OCR detection format from EasyOCR readtext: {detection}")
|
167
|
+
|
168
|
+
except ValueError as e:
|
169
|
+
# Re-raise any value errors from standardization or format checks
|
170
|
+
raise e
|
191
171
|
except Exception as e:
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
172
|
+
# Catch other potential processing errors
|
173
|
+
raise ValueError(f"Error processing EasyOCR detection item: {detection}") from e
|
174
|
+
|
175
|
+
return standardized_regions
|