natural-pdf 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +31 -0
- natural_pdf/analyzers/layout/gemini.py +137 -162
- natural_pdf/analyzers/layout/layout_manager.py +9 -5
- natural_pdf/analyzers/layout/layout_options.py +77 -7
- natural_pdf/analyzers/layout/paddle.py +318 -165
- natural_pdf/analyzers/layout/table_structure_utils.py +78 -0
- natural_pdf/analyzers/shape_detection_mixin.py +770 -405
- natural_pdf/classification/mixin.py +2 -8
- natural_pdf/collections/pdf_collection.py +25 -30
- natural_pdf/core/highlighting_service.py +47 -32
- natural_pdf/core/page.py +226 -70
- natural_pdf/core/pdf.py +19 -22
- natural_pdf/elements/base.py +9 -9
- natural_pdf/elements/collections.py +105 -50
- natural_pdf/elements/region.py +320 -113
- natural_pdf/exporters/paddleocr.py +38 -13
- natural_pdf/flows/__init__.py +3 -3
- natural_pdf/flows/collections.py +303 -132
- natural_pdf/flows/element.py +277 -132
- natural_pdf/flows/flow.py +33 -16
- natural_pdf/flows/region.py +142 -79
- natural_pdf/ocr/engine_doctr.py +37 -4
- natural_pdf/ocr/engine_easyocr.py +23 -3
- natural_pdf/ocr/engine_paddle.py +281 -30
- natural_pdf/ocr/engine_surya.py +8 -3
- natural_pdf/ocr/ocr_manager.py +75 -76
- natural_pdf/ocr/ocr_options.py +52 -87
- natural_pdf/search/__init__.py +25 -12
- natural_pdf/search/lancedb_search_service.py +91 -54
- natural_pdf/search/numpy_search_service.py +86 -65
- natural_pdf/search/searchable_mixin.py +2 -2
- natural_pdf/selectors/parser.py +125 -81
- natural_pdf/widgets/__init__.py +1 -1
- natural_pdf/widgets/viewer.py +205 -449
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/METADATA +27 -45
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/RECORD +39 -38
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/top_level.txt +0 -0
natural_pdf/ocr/engine_paddle.py
CHANGED
@@ -11,10 +11,99 @@ from .ocr_options import BaseOCROptions, PaddleOCROptions
|
|
11
11
|
|
12
12
|
logger = logging.getLogger(__name__)
|
13
13
|
|
14
|
-
|
15
14
|
class PaddleOCREngine(OCREngine):
|
16
15
|
"""PaddleOCR engine implementation."""
|
17
16
|
|
17
|
+
SUPPORT_MATRIX = {
|
18
|
+
"PP-OCRv5": {"ch", "chinese_cht", "en", "japan"},
|
19
|
+
"PP-OCRv4": {"ch", "en"},
|
20
|
+
"PP-OCRv3": {
|
21
|
+
"abq",
|
22
|
+
"af",
|
23
|
+
"ady",
|
24
|
+
"ang",
|
25
|
+
"ar",
|
26
|
+
"ava",
|
27
|
+
"az",
|
28
|
+
"be",
|
29
|
+
"bg",
|
30
|
+
"bgc",
|
31
|
+
"bh",
|
32
|
+
"bho",
|
33
|
+
"bs",
|
34
|
+
"ch",
|
35
|
+
"che",
|
36
|
+
"chinese_cht",
|
37
|
+
"cs",
|
38
|
+
"cy",
|
39
|
+
"da",
|
40
|
+
"dar",
|
41
|
+
"de",
|
42
|
+
"german",
|
43
|
+
"en",
|
44
|
+
"es",
|
45
|
+
"et",
|
46
|
+
"fa",
|
47
|
+
"fr",
|
48
|
+
"french",
|
49
|
+
"ga",
|
50
|
+
"gom",
|
51
|
+
"hi",
|
52
|
+
"hr",
|
53
|
+
"hu",
|
54
|
+
"id",
|
55
|
+
"inh",
|
56
|
+
"is",
|
57
|
+
"it",
|
58
|
+
"japan",
|
59
|
+
"ka",
|
60
|
+
"kbd",
|
61
|
+
"korean",
|
62
|
+
"ku",
|
63
|
+
"la",
|
64
|
+
"lbe",
|
65
|
+
"lez",
|
66
|
+
"lt",
|
67
|
+
"lv",
|
68
|
+
"mah",
|
69
|
+
"mai",
|
70
|
+
"mi",
|
71
|
+
"mn",
|
72
|
+
"mr",
|
73
|
+
"ms",
|
74
|
+
"mt",
|
75
|
+
"ne",
|
76
|
+
"new",
|
77
|
+
"nl",
|
78
|
+
"no",
|
79
|
+
"oc",
|
80
|
+
"pi",
|
81
|
+
"pl",
|
82
|
+
"pt",
|
83
|
+
"ro",
|
84
|
+
"rs_cyrillic",
|
85
|
+
"rs_latin",
|
86
|
+
"ru",
|
87
|
+
"sa",
|
88
|
+
"sck",
|
89
|
+
"sk",
|
90
|
+
"sl",
|
91
|
+
"sq",
|
92
|
+
"sv",
|
93
|
+
"sw",
|
94
|
+
"ta",
|
95
|
+
"tab",
|
96
|
+
"te",
|
97
|
+
"tl",
|
98
|
+
"tr",
|
99
|
+
"ug",
|
100
|
+
"uk",
|
101
|
+
"ur",
|
102
|
+
"uz",
|
103
|
+
"vi",
|
104
|
+
},
|
105
|
+
}
|
106
|
+
|
18
107
|
def __init__(self):
|
19
108
|
super().__init__()
|
20
109
|
|
@@ -30,43 +119,160 @@ class PaddleOCREngine(OCREngine):
|
|
30
119
|
def _initialize_model(
|
31
120
|
self, languages: List[str], device: str, options: Optional[BaseOCROptions]
|
32
121
|
):
|
33
|
-
"""Initialize the PaddleOCR model."""
|
122
|
+
"""Initialize the PaddleOCR model using the >=3.0.0 pipeline API."""
|
34
123
|
try:
|
35
124
|
import paddleocr
|
36
125
|
|
37
126
|
self.logger.info("PaddleOCR module imported successfully.")
|
38
127
|
except ImportError as e:
|
39
128
|
self.logger.error(f"Failed to import PaddleOCR/PaddlePaddle: {e}")
|
40
|
-
raise
|
129
|
+
raise RuntimeError(
|
130
|
+
"paddleocr is not available. Please install it and paddlepaddle with: pip install -U paddlepaddle paddleocr"
|
131
|
+
) from e
|
41
132
|
|
42
|
-
# Cast to PaddleOCROptions if possible
|
43
133
|
paddle_options = options if isinstance(options, PaddleOCROptions) else PaddleOCROptions()
|
44
134
|
|
45
|
-
|
135
|
+
if len(languages) > 1:
|
136
|
+
self.logger.warning(
|
137
|
+
"PaddleOCR >= 3.0.0 only supports one language at a time. "
|
138
|
+
"Using the first language provided: '%s'",
|
139
|
+
languages[0],
|
140
|
+
)
|
46
141
|
primary_lang = languages[0] if languages else "en"
|
47
|
-
use_gpu = "cuda" in str(device).lower()
|
48
142
|
|
49
|
-
#
|
50
|
-
|
143
|
+
# Determine the appropriate ocr_version based on language support
|
144
|
+
user_ocr_version = paddle_options.ocr_version
|
145
|
+
final_ocr_version = user_ocr_version
|
146
|
+
version_preference = ["PP-OCRv5", "PP-OCRv4", "PP-OCRv3"]
|
147
|
+
|
148
|
+
# --- RESTORE: Language/version support check logic ---
|
149
|
+
user_specified_model = (
|
150
|
+
getattr(paddle_options, "text_recognition_model_name", None) is not None or
|
151
|
+
getattr(paddle_options, "text_detection_model_name", None) is not None
|
152
|
+
)
|
153
|
+
if user_specified_model and user_ocr_version:
|
154
|
+
if primary_lang not in self.SUPPORT_MATRIX.get(user_ocr_version, set()):
|
155
|
+
self.logger.warning(
|
156
|
+
f"Model '{user_ocr_version}' was explicitly specified, but language '{primary_lang}' is not officially supported. Proceeding anyway as requested."
|
157
|
+
)
|
158
|
+
|
159
|
+
if user_ocr_version:
|
160
|
+
if primary_lang not in self.SUPPORT_MATRIX.get(user_ocr_version, set()):
|
161
|
+
self.logger.warning(
|
162
|
+
f"Language '{primary_lang}' is not supported by the requested ocr_version '{user_ocr_version}'. "
|
163
|
+
f"Attempting to find a compatible version."
|
164
|
+
)
|
165
|
+
self.logger.warning(
|
166
|
+
"Language '%s' is not supported by the requested ocr_version '%s'. "
|
167
|
+
"Attempting to find a compatible version.",
|
168
|
+
primary_lang,
|
169
|
+
user_ocr_version,
|
170
|
+
)
|
171
|
+
final_ocr_version = None # Reset to find a compatible version
|
172
|
+
|
173
|
+
# If no version was specified or the specified one was incompatible, find the best fit.
|
174
|
+
if not final_ocr_version:
|
175
|
+
found_compatible = False
|
176
|
+
for version in version_preference:
|
177
|
+
if primary_lang in self.SUPPORT_MATRIX[version]:
|
178
|
+
final_ocr_version = version
|
179
|
+
found_compatible = True
|
180
|
+
break
|
181
|
+
|
182
|
+
if not found_compatible:
|
183
|
+
if not languages or not primary_lang:
|
184
|
+
final_ocr_version = "PP-OCRv5"
|
185
|
+
self.logger.info(
|
186
|
+
"No language specified and no match found. Defaulting to ocr_version 'PP-OCRv5'. Note: 'PP-OCRv3' has the widest language support among PaddleOCR versions."
|
187
|
+
)
|
188
|
+
else:
|
189
|
+
self.logger.error(
|
190
|
+
"Language '%s' is not supported by any available PaddleOCR version (v3, v4, v5). "
|
191
|
+
"Proceeding without a specific version, but this is likely to fail.",
|
192
|
+
primary_lang,
|
193
|
+
)
|
194
|
+
final_ocr_version = None # Let paddleocr handle the error
|
195
|
+
elif final_ocr_version != "PP-OCRv5":
|
196
|
+
self.logger.warning(
|
197
|
+
f"Automatically selected ocr_version '{final_ocr_version}' for language '{primary_lang}'. This is not the default (PP-OCRv5)."
|
198
|
+
)
|
199
|
+
self.logger.warning(
|
200
|
+
"Automatically selected ocr_version '%s' for language '%s'. This is not the default (PP-OCRv5).",
|
201
|
+
final_ocr_version,
|
202
|
+
primary_lang,
|
203
|
+
)
|
204
|
+
# else: if PP-OCRv5, no need to log
|
205
|
+
elif final_ocr_version != "PP-OCRv5":
|
206
|
+
self.logger.warning(
|
207
|
+
f"Using user-specified ocr_version '{final_ocr_version}' for language '{primary_lang}'. This is not the default (PP-OCRv5)."
|
208
|
+
)
|
209
|
+
self.logger.warning(
|
210
|
+
"Using user-specified ocr_version '%s' for language '%s'. This is not the default (PP-OCRv5).",
|
211
|
+
final_ocr_version,
|
212
|
+
primary_lang,
|
213
|
+
)
|
214
|
+
# --- END RESTORE ---
|
215
|
+
|
216
|
+
# Build PaddleOCR config dict from valid constructor arguments.
|
217
|
+
# See: https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/pipeline_usage/OCR.html
|
218
|
+
valid_init_args = {
|
219
|
+
"doc_orientation_classify_model_name",
|
220
|
+
"doc_orientation_classify_model_dir",
|
221
|
+
"doc_unwarping_model_name",
|
222
|
+
"doc_unwarping_model_dir",
|
223
|
+
"text_detection_model_name",
|
224
|
+
"text_detection_model_dir",
|
225
|
+
"textline_orientation_model_name",
|
226
|
+
"textline_orientation_model_dir",
|
227
|
+
"text_recognition_model_name",
|
228
|
+
"text_recognition_model_dir",
|
229
|
+
"textline_orientation_batch_size",
|
230
|
+
"text_recognition_batch_size",
|
231
|
+
"use_doc_orientation_classify",
|
232
|
+
"use_doc_unwarping",
|
233
|
+
"use_textline_orientation",
|
234
|
+
"text_det_limit_side_len",
|
235
|
+
"text_det_limit_type",
|
236
|
+
"text_det_thresh",
|
237
|
+
"text_det_box_thresh",
|
238
|
+
"text_det_unclip_ratio",
|
239
|
+
"text_det_input_shape",
|
240
|
+
"text_rec_score_thresh",
|
241
|
+
"text_rec_input_shape",
|
242
|
+
"lang",
|
243
|
+
"ocr_version",
|
244
|
+
"device",
|
245
|
+
"enable_hpi",
|
246
|
+
"use_tensorrt",
|
247
|
+
"precision",
|
248
|
+
"enable_mkldnn",
|
249
|
+
# "mkldnn_cache_capacity",
|
250
|
+
"cpu_threads",
|
251
|
+
"paddlex_config",
|
252
|
+
}
|
253
|
+
|
254
|
+
# Start with defaults passed from the main apply_ocr call.
|
255
|
+
ocr_config = {
|
51
256
|
"lang": primary_lang,
|
52
|
-
"
|
53
|
-
"use_angle_cls": paddle_options.use_angle_cls,
|
54
|
-
"det": True,
|
55
|
-
"rec": True, # We'll control recognition at process time
|
257
|
+
"device": device,
|
56
258
|
}
|
57
259
|
|
58
|
-
# Add
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
260
|
+
# Add the determined ocr_version to the config if available
|
261
|
+
if final_ocr_version:
|
262
|
+
ocr_config["ocr_version"] = final_ocr_version
|
263
|
+
|
264
|
+
# Populate ocr_config from paddle_options with non-None values
|
265
|
+
# that are valid for the constructor. This allows overriding defaults.
|
266
|
+
for arg in valid_init_args:
|
267
|
+
if hasattr(paddle_options, arg):
|
268
|
+
value = getattr(paddle_options, arg)
|
269
|
+
if value is not None:
|
270
|
+
ocr_config[arg] = value
|
64
271
|
|
65
|
-
self.logger.debug(f"PaddleOCR constructor args: {constructor_args}")
|
66
272
|
|
67
|
-
# Create the model
|
68
273
|
try:
|
69
|
-
|
274
|
+
# The new API uses PaddleOCR as a pipeline object.
|
275
|
+
self._model = paddleocr.PaddleOCR(**ocr_config)
|
70
276
|
self.logger.info("PaddleOCR model created successfully")
|
71
277
|
except Exception as e:
|
72
278
|
self.logger.error(f"Failed to create PaddleOCR model: {e}")
|
@@ -84,19 +290,35 @@ class PaddleOCREngine(OCREngine):
|
|
84
290
|
def _process_single_image(
|
85
291
|
self, image: np.ndarray, detect_only: bool, options: Optional[PaddleOCROptions]
|
86
292
|
) -> Any:
|
87
|
-
"""Process a single image with PaddleOCR."""
|
293
|
+
"""Process a single image with PaddleOCR using the .predict() method."""
|
88
294
|
if self._model is None:
|
89
295
|
raise RuntimeError("PaddleOCR model not initialized")
|
90
296
|
|
91
|
-
# Prepare
|
92
|
-
|
297
|
+
# Prepare arguments for the .predict() method from PaddleOCROptions.
|
298
|
+
# See: https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/pipeline_usage/OCR.html
|
299
|
+
predict_args = {}
|
93
300
|
if options and isinstance(options, PaddleOCROptions):
|
94
|
-
|
95
|
-
|
96
|
-
|
301
|
+
valid_predict_args = {
|
302
|
+
"use_doc_orientation_classify",
|
303
|
+
"use_doc_unwarping",
|
304
|
+
"use_textline_orientation",
|
305
|
+
"text_det_limit_side_len",
|
306
|
+
"text_det_limit_type",
|
307
|
+
"text_det_thresh",
|
308
|
+
"text_det_box_thresh",
|
309
|
+
"text_det_unclip_ratio",
|
310
|
+
"text_rec_score_thresh",
|
311
|
+
}
|
312
|
+
for arg in valid_predict_args:
|
313
|
+
if hasattr(options, arg) and getattr(options, arg) is not None:
|
314
|
+
predict_args[arg] = getattr(options, arg)
|
315
|
+
|
316
|
+
# The `detect_only` flag is handled in `_standardize_results` by ignoring
|
317
|
+
# the recognized text and confidence, as the new .predict() API does not
|
318
|
+
# have a direct flag to disable only the recognition step.
|
97
319
|
|
98
|
-
# Run OCR
|
99
|
-
raw_results = self._model.
|
320
|
+
# Run OCR using the new .predict() method.
|
321
|
+
raw_results = self._model.predict(image)
|
100
322
|
return raw_results
|
101
323
|
|
102
324
|
def _standardize_results(
|
@@ -108,8 +330,37 @@ class PaddleOCREngine(OCREngine):
|
|
108
330
|
if not raw_results or not isinstance(raw_results, list) or len(raw_results) == 0:
|
109
331
|
return standardized_regions
|
110
332
|
|
111
|
-
|
333
|
+
# New PaddleOCR 3.x format: list of dicts with keys like 'rec_texts', 'rec_scores', 'rec_boxes'
|
334
|
+
if isinstance(raw_results[0], dict):
|
335
|
+
for page in raw_results:
|
336
|
+
rec_texts = page.get("rec_texts", [])
|
337
|
+
rec_scores = page.get("rec_scores", [])
|
338
|
+
rec_boxes = page.get("rec_boxes", [])
|
339
|
+
# Fallback to dt_polys if rec_boxes is not present or empty
|
340
|
+
if rec_boxes is None or len(rec_boxes) == 0:
|
341
|
+
rec_boxes = page.get("dt_polys", [])
|
342
|
+
for i in range(len(rec_texts)):
|
343
|
+
text = str(rec_texts[i]) if not detect_only else None
|
344
|
+
confidence = float(rec_scores[i]) if not detect_only else None
|
345
|
+
# --- Bounding box format note ---
|
346
|
+
# PaddleOCR 3.x may return bounding boxes in several formats:
|
347
|
+
# - Rectangle: [x1, y1, x2, y2] (list or 1D numpy array of length 4)
|
348
|
+
# - Polygon: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] (list of 4 points or 2D numpy array shape (4,2))
|
349
|
+
# - Sometimes, rec_boxes is a numpy array of shape (N, 4) or (N, 4, 2)
|
350
|
+
# This code converts any numpy array to a list before passing to _standardize_bbox,
|
351
|
+
# which handles both rectangle and polygon formats robustly.
|
352
|
+
box = rec_boxes[i]
|
353
|
+
if hasattr(box, 'tolist'):
|
354
|
+
box = box.tolist()
|
355
|
+
bbox = self._standardize_bbox(box)
|
356
|
+
if detect_only:
|
357
|
+
standardized_regions.append(TextRegion(bbox, text=None, confidence=None))
|
358
|
+
elif confidence is not None and confidence >= min_confidence:
|
359
|
+
standardized_regions.append(TextRegion(bbox, text, confidence))
|
360
|
+
return standardized_regions
|
112
361
|
|
362
|
+
# Old format fallback (list of lists/tuples)
|
363
|
+
page_results = raw_results[0] if raw_results[0] is not None else []
|
113
364
|
for detection in page_results:
|
114
365
|
# Initialize text and confidence
|
115
366
|
text = None
|
natural_pdf/ocr/engine_surya.py
CHANGED
@@ -38,11 +38,17 @@ class SuryaOCREngine(OCREngine):
|
|
38
38
|
self.logger.info("Surya modules imported successfully.")
|
39
39
|
|
40
40
|
predictor_args = {} # Configure if needed
|
41
|
+
# Filter only allowed Surya args (currently none, but placeholder for future)
|
42
|
+
allowed_args = set() # Update if Surya supports constructor args
|
43
|
+
filtered_args = {k: v for k, v in predictor_args.items() if k in allowed_args}
|
44
|
+
dropped = set(predictor_args) - allowed_args
|
45
|
+
if dropped:
|
46
|
+
self.logger.warning(f"Dropped unsupported Surya args: {dropped}")
|
41
47
|
|
42
48
|
self.logger.info("Instantiating Surya DetectionPredictor...")
|
43
|
-
self._detection_predictor = self._surya_detection(**
|
49
|
+
self._detection_predictor = self._surya_detection(**filtered_args)
|
44
50
|
self.logger.info("Instantiating Surya RecognitionPredictor...")
|
45
|
-
self._recognition_predictor = self._surya_recognition(**
|
51
|
+
self._recognition_predictor = self._surya_recognition(**filtered_args)
|
46
52
|
|
47
53
|
self.logger.info("Surya predictors initialized.")
|
48
54
|
|
@@ -70,7 +76,6 @@ class SuryaOCREngine(OCREngine):
|
|
70
76
|
else:
|
71
77
|
results = self._recognition_predictor(
|
72
78
|
images=[image],
|
73
|
-
langs=langs, # Use the languages set during initialization
|
74
79
|
det_predictor=self._detection_predictor,
|
75
80
|
)
|
76
81
|
|
natural_pdf/ocr/ocr_manager.py
CHANGED
@@ -83,6 +83,15 @@ class OCRManager:
|
|
83
83
|
if not engine_instance.is_available():
|
84
84
|
# Check availability before storing
|
85
85
|
install_hint = f"pip install 'natural-pdf[{engine_name}]'"
|
86
|
+
if engine_name == "easyocr":
|
87
|
+
install_hint = "pip install easyocr"
|
88
|
+
elif engine_name == "paddle":
|
89
|
+
install_hint = "pip install paddleocr paddlepaddle"
|
90
|
+
elif engine_name == "surya":
|
91
|
+
install_hint = "pip install surya-ocr"
|
92
|
+
elif engine_name == "doctr":
|
93
|
+
install_hint = "pip install 'python-doctr[torch]'"
|
94
|
+
|
86
95
|
raise RuntimeError(
|
87
96
|
f"Engine '{engine_name}' is not available. Please install the required dependencies: {install_hint}"
|
88
97
|
)
|
@@ -184,93 +193,83 @@ class OCRManager:
|
|
184
193
|
)
|
185
194
|
|
186
195
|
# --- Get Engine Instance and Process ---
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
196
|
+
engine_instance = self._get_engine_instance(selected_engine_name)
|
197
|
+
processing_mode = "batch" if is_batch else "single image"
|
198
|
+
# Log thread name for clarity during parallel calls
|
199
|
+
thread_id = threading.current_thread().name
|
200
|
+
logger.info(
|
201
|
+
f"[{thread_id}] Processing {processing_mode} using shared engine instance '{selected_engine_name}'..."
|
202
|
+
)
|
203
|
+
logger.debug(
|
204
|
+
f" Engine Args: languages={languages}, min_confidence={min_confidence}, device={device}, options={final_options}"
|
205
|
+
)
|
206
|
+
|
207
|
+
# Log image dimensions before processing
|
208
|
+
if is_batch:
|
209
|
+
image_dims = [
|
210
|
+
f"{img.width}x{img.height}"
|
211
|
+
for img in images
|
212
|
+
if hasattr(img, "width") and hasattr(img, "height")
|
213
|
+
]
|
214
|
+
logger.debug(
|
215
|
+
f"[{thread_id}] Processing batch of {len(images)} images with dimensions: {image_dims}"
|
194
216
|
)
|
217
|
+
elif hasattr(images, "width") and hasattr(images, "height"):
|
195
218
|
logger.debug(
|
196
|
-
f"
|
219
|
+
f"[{thread_id}] Processing single image with dimensions: {images.width}x{images.height}"
|
197
220
|
)
|
221
|
+
else:
|
222
|
+
logger.warning(f"[{thread_id}] Could not determine dimensions of input image(s).")
|
198
223
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
)
|
209
|
-
|
210
|
-
|
211
|
-
f"[{thread_id}] Processing single image with dimensions: {images.width}x{images.height}"
|
212
|
-
)
|
213
|
-
else:
|
214
|
-
logger.warning(f"[{thread_id}] Could not determine dimensions of input image(s).")
|
224
|
+
# Acquire lock specifically for the inference call
|
225
|
+
inference_lock = self._get_engine_inference_lock(selected_engine_name)
|
226
|
+
logger.debug(
|
227
|
+
f"[{thread_id}] Attempting to acquire inference lock for {selected_engine_name}..."
|
228
|
+
)
|
229
|
+
inference_wait_start = time.monotonic()
|
230
|
+
with inference_lock:
|
231
|
+
inference_acquired_time = time.monotonic()
|
232
|
+
logger.debug(
|
233
|
+
f"[{thread_id}] Acquired inference lock for {selected_engine_name} (waited {inference_acquired_time - inference_wait_start:.2f}s). Calling process_image..."
|
234
|
+
)
|
235
|
+
inference_start_time = time.monotonic()
|
215
236
|
|
216
|
-
|
217
|
-
|
237
|
+
results = engine_instance.process_image(
|
238
|
+
images=images,
|
239
|
+
languages=languages,
|
240
|
+
min_confidence=min_confidence,
|
241
|
+
device=device,
|
242
|
+
detect_only=detect_only,
|
243
|
+
options=final_options,
|
244
|
+
)
|
245
|
+
inference_end_time = time.monotonic()
|
218
246
|
logger.debug(
|
219
|
-
f"[{thread_id}]
|
247
|
+
f"[{thread_id}] process_image call finished for {selected_engine_name} (Duration: {inference_end_time - inference_start_time:.2f}s). Releasing lock."
|
220
248
|
)
|
221
|
-
inference_wait_start = time.monotonic()
|
222
|
-
with inference_lock:
|
223
|
-
inference_acquired_time = time.monotonic()
|
224
|
-
logger.debug(
|
225
|
-
f"[{thread_id}] Acquired inference lock for {selected_engine_name} (waited {inference_acquired_time - inference_wait_start:.2f}s). Calling process_image..."
|
226
|
-
)
|
227
|
-
inference_start_time = time.monotonic()
|
228
249
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
250
|
+
# Log result summary based on mode
|
251
|
+
if is_batch:
|
252
|
+
# Ensure results is a list before trying to get lengths
|
253
|
+
if isinstance(results, list):
|
254
|
+
num_results_per_image = [
|
255
|
+
len(res_list) if isinstance(res_list, list) else -1 for res_list in results
|
256
|
+
] # Handle potential errors returning non-lists
|
257
|
+
logger.info(
|
258
|
+
f"Processing complete. Found results per image: {num_results_per_image}"
|
236
259
|
)
|
237
|
-
|
238
|
-
logger.
|
239
|
-
f"
|
260
|
+
else:
|
261
|
+
logger.error(
|
262
|
+
f"Processing complete but received unexpected result type for batch: {type(results)}"
|
240
263
|
)
|
241
|
-
|
242
|
-
#
|
243
|
-
if
|
244
|
-
|
245
|
-
if isinstance(results, list):
|
246
|
-
num_results_per_image = [
|
247
|
-
len(res_list) if isinstance(res_list, list) else -1 for res_list in results
|
248
|
-
] # Handle potential errors returning non-lists
|
249
|
-
logger.info(
|
250
|
-
f"Processing complete. Found results per image: {num_results_per_image}"
|
251
|
-
)
|
252
|
-
else:
|
253
|
-
logger.error(
|
254
|
-
f"Processing complete but received unexpected result type for batch: {type(results)}"
|
255
|
-
)
|
264
|
+
else:
|
265
|
+
# Ensure results is a list
|
266
|
+
if isinstance(results, list):
|
267
|
+
logger.info(f"Processing complete. Found {len(results)} results.")
|
256
268
|
else:
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
logger.error(
|
262
|
-
f"Processing complete but received unexpected result type for single image: {type(results)}"
|
263
|
-
)
|
264
|
-
return results # Return type matches input type due to engine logic
|
265
|
-
|
266
|
-
except (ImportError, RuntimeError, ValueError, TypeError) as e:
|
267
|
-
logger.error(
|
268
|
-
f"OCR processing failed for engine '{selected_engine_name}': {e}", exc_info=True
|
269
|
-
)
|
270
|
-
raise # Re-raise expected errors
|
271
|
-
except Exception as e:
|
272
|
-
logger.error(f"An unexpected error occurred during OCR processing: {e}", exc_info=True)
|
273
|
-
raise # Re-raise unexpected errors
|
269
|
+
logger.error(
|
270
|
+
f"Processing complete but received unexpected result type for single image: {type(results)}"
|
271
|
+
)
|
272
|
+
return results # Return type matches input type due to engine logic
|
274
273
|
|
275
274
|
def get_available_engines(self) -> List[str]:
|
276
275
|
"""Returns a list of registered engine names that are currently available."""
|