kreuzberg 3.11.4__py3-none-any.whl → 3.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. kreuzberg/__init__.py +14 -13
  2. kreuzberg/__main__.py +0 -2
  3. kreuzberg/_api/main.py +119 -9
  4. kreuzberg/_chunker.py +0 -15
  5. kreuzberg/_config.py +212 -292
  6. kreuzberg/_document_classification.py +20 -47
  7. kreuzberg/_entity_extraction.py +1 -122
  8. kreuzberg/_extractors/_base.py +4 -71
  9. kreuzberg/_extractors/_email.py +1 -15
  10. kreuzberg/_extractors/_html.py +9 -12
  11. kreuzberg/_extractors/_image.py +1 -25
  12. kreuzberg/_extractors/_pandoc.py +10 -147
  13. kreuzberg/_extractors/_pdf.py +38 -94
  14. kreuzberg/_extractors/_presentation.py +0 -99
  15. kreuzberg/_extractors/_spread_sheet.py +13 -55
  16. kreuzberg/_extractors/_structured.py +1 -4
  17. kreuzberg/_gmft.py +14 -199
  18. kreuzberg/_language_detection.py +1 -36
  19. kreuzberg/_mcp/__init__.py +0 -2
  20. kreuzberg/_mcp/server.py +3 -10
  21. kreuzberg/_mime_types.py +1 -19
  22. kreuzberg/_ocr/_base.py +4 -76
  23. kreuzberg/_ocr/_easyocr.py +124 -186
  24. kreuzberg/_ocr/_paddleocr.py +154 -224
  25. kreuzberg/_ocr/_table_extractor.py +184 -0
  26. kreuzberg/_ocr/_tesseract.py +797 -361
  27. kreuzberg/_playa.py +5 -31
  28. kreuzberg/_registry.py +0 -36
  29. kreuzberg/_types.py +588 -93
  30. kreuzberg/_utils/_cache.py +84 -138
  31. kreuzberg/_utils/_device.py +0 -74
  32. kreuzberg/_utils/_document_cache.py +0 -75
  33. kreuzberg/_utils/_errors.py +0 -50
  34. kreuzberg/_utils/_ocr_cache.py +136 -0
  35. kreuzberg/_utils/_pdf_lock.py +0 -16
  36. kreuzberg/_utils/_process_pool.py +17 -64
  37. kreuzberg/_utils/_quality.py +0 -60
  38. kreuzberg/_utils/_ref.py +32 -0
  39. kreuzberg/_utils/_serialization.py +0 -30
  40. kreuzberg/_utils/_string.py +9 -59
  41. kreuzberg/_utils/_sync.py +0 -77
  42. kreuzberg/_utils/_table.py +49 -101
  43. kreuzberg/_utils/_tmp.py +0 -9
  44. kreuzberg/cli.py +54 -74
  45. kreuzberg/extraction.py +39 -32
  46. {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.1.dist-info}/METADATA +19 -15
  47. kreuzberg-3.13.1.dist-info/RECORD +57 -0
  48. kreuzberg-3.11.4.dist-info/RECORD +0 -54
  49. {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.1.dist-info}/WHEEL +0 -0
  50. {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.1.dist-info}/entry_points.txt +0 -0
  51. {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.1.dist-info}/licenses/LICENSE +0 -0
@@ -2,17 +2,25 @@ from __future__ import annotations
2
2
 
3
3
  import platform
4
4
  import warnings
5
- from dataclasses import dataclass
6
5
  from importlib.util import find_spec
7
- from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
6
+ from typing import TYPE_CHECKING, Any, ClassVar, Final
9
7
 
10
8
  from PIL import Image
11
9
 
12
10
  from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
13
11
  from kreuzberg._ocr._base import OCRBackend
14
- from kreuzberg._types import ExtractionResult, Metadata
15
- from kreuzberg._utils._device import DeviceInfo, DeviceType, validate_device_request
12
+ from kreuzberg._types import ExtractionResult, Metadata, PaddleOCRConfig
13
+ from kreuzberg._utils._device import DeviceInfo, validate_device_request
14
+ from kreuzberg._utils._ocr_cache import (
15
+ build_cache_kwargs,
16
+ cache_and_complete_async,
17
+ cache_and_complete_sync,
18
+ generate_image_hash,
19
+ get_file_info,
20
+ handle_cache_lookup_async,
21
+ handle_cache_lookup_sync,
22
+ mark_processing_complete,
23
+ )
16
24
  from kreuzberg._utils._string import normalize_spaces
17
25
  from kreuzberg._utils._sync import run_sync
18
26
  from kreuzberg.exceptions import MissingDependencyError, OCRError, ValidationError
@@ -20,154 +28,97 @@ from kreuzberg.exceptions import MissingDependencyError, OCRError, ValidationErr
20
28
  if TYPE_CHECKING:
21
29
  from pathlib import Path
22
30
 
23
-
24
31
  try: # pragma: no cover
25
32
  from typing import Unpack # type: ignore[attr-defined]
26
33
  except ImportError: # pragma: no cover
27
34
  from typing_extensions import Unpack
28
35
 
36
+ if TYPE_CHECKING:
37
+ import numpy as np
38
+ from paddleocr import PaddleOCR
29
39
 
30
- PADDLEOCR_SUPPORTED_LANGUAGE_CODES: Final[set[str]] = {"ch", "en", "french", "german", "japan", "korean"}
40
+ HAS_PADDLEOCR: bool
41
+ if not TYPE_CHECKING:
42
+ try:
43
+ import numpy as np
44
+ from paddleocr import PaddleOCR
31
45
 
46
+ HAS_PADDLEOCR = True
47
+ except ImportError:
48
+ HAS_PADDLEOCR = False
49
+ np: Any = None
50
+ PaddleOCR: Any = None
32
51
 
33
- @dataclass(unsafe_hash=True, frozen=True, slots=True)
34
- class PaddleOCRConfig:
35
- """Configuration options for PaddleOCR.
36
-
37
- This TypedDict provides type hints and documentation for all PaddleOCR parameters.
38
- """
39
-
40
- cls_image_shape: str = "3,48,192"
41
- """Image shape for classification algorithm in format 'channels,height,width'."""
42
- det_algorithm: Literal["DB", "EAST", "SAST", "PSE", "FCE", "PAN", "CT", "DB++", "Layout"] = "DB"
43
- """Detection algorithm."""
44
- det_db_box_thresh: float = 0.5
45
- """Score threshold for detected boxes. Boxes below this value are discarded."""
46
- det_db_thresh: float = 0.3
47
- """Binarization threshold for DB output map."""
48
- det_db_unclip_ratio: float = 2.0
49
- """Expansion ratio for detected text boxes."""
50
- det_east_cover_thresh: float = 0.1
51
- """Score threshold for EAST output boxes."""
52
- det_east_nms_thresh: float = 0.2
53
- """NMS threshold for EAST model output boxes."""
54
- det_east_score_thresh: float = 0.8
55
- """Binarization threshold for EAST output map."""
56
- det_max_side_len: int = 960
57
- """Maximum size of image long side. Images exceeding this will be proportionally resized."""
58
- det_model_dir: str | None = None
59
- """Directory for detection model. If None, uses default model location."""
60
- drop_score: float = 0.5
61
- """Filter recognition results by confidence score. Results below this are discarded."""
62
- enable_mkldnn: bool = False
63
- """Whether to enable MKL-DNN acceleration (Intel CPU only)."""
64
- gpu_mem: int = 8000
65
- """GPU memory size (in MB) to use for initialization."""
66
- language: str = "en"
67
- """Language to use for OCR."""
68
- max_text_length: int = 25
69
- """Maximum text length that the recognition algorithm can recognize."""
70
- rec: bool = True
71
- """Enable text recognition when using the ocr() function."""
72
- rec_algorithm: Literal[
73
- "CRNN",
74
- "SRN",
75
- "NRTR",
76
- "SAR",
77
- "SEED",
78
- "SVTR",
79
- "SVTR_LCNet",
80
- "ViTSTR",
81
- "ABINet",
82
- "VisionLAN",
83
- "SPIN",
84
- "RobustScanner",
85
- "RFL",
86
- ] = "CRNN"
87
- """Recognition algorithm."""
88
- rec_image_shape: str = "3,32,320"
89
- """Image shape for recognition algorithm in format 'channels,height,width'."""
90
- rec_model_dir: str | None = None
91
- """Directory for recognition model. If None, uses default model location."""
92
- table: bool = True
93
- """Whether to enable table recognition."""
94
- use_angle_cls: bool = True
95
- """Whether to use text orientation classification model."""
96
- use_gpu: bool = False
97
- """Whether to use GPU for inference. DEPRECATED: Use 'device' parameter instead."""
98
- device: DeviceType = "auto"
99
- """Device to use for inference. Options: 'cpu', 'cuda', 'auto'. Note: MPS not supported by PaddlePaddle."""
100
- gpu_memory_limit: float | None = None
101
- """Maximum GPU memory to use in GB. None for no limit."""
102
- fallback_to_cpu: bool = True
103
- """Whether to fallback to CPU if requested device is unavailable."""
104
- use_space_char: bool = True
105
- """Whether to recognize spaces."""
106
- use_zero_copy_run: bool = False
107
- """Whether to enable zero_copy_run for inference optimization."""
52
+
53
+ PADDLEOCR_SUPPORTED_LANGUAGE_CODES: Final[set[str]] = {"ch", "en", "french", "german", "japan", "korean"}
108
54
 
109
55
 
110
56
  class PaddleBackend(OCRBackend[PaddleOCRConfig]):
111
57
  _paddle_ocr: ClassVar[Any] = None
112
58
 
113
59
  async def process_image(self, image: Image.Image, **kwargs: Unpack[PaddleOCRConfig]) -> ExtractionResult:
114
- """Asynchronously process an image and extract its text and metadata using PaddleOCR.
60
+ use_cache = kwargs.pop("use_cache", True)
115
61
 
116
- Args:
117
- image: An instance of PIL.Image representing the input image.
118
- **kwargs: Configuration parameters for PaddleOCR including language, detection thresholds, etc.
62
+ cache_kwargs = None
63
+ if use_cache:
64
+ image_hash = generate_image_hash(image)
65
+ cache_kwargs = build_cache_kwargs("paddleocr", kwargs, image_hash=image_hash)
119
66
 
120
- Returns:
121
- ExtractionResult: The extraction result containing text content, mime type, and metadata.
67
+ cached_result = await handle_cache_lookup_async(cache_kwargs)
68
+ if cached_result:
69
+ return cached_result
122
70
 
123
- Raises:
124
- OCRError: If OCR processing fails.
125
- """
126
- import numpy as np # noqa: PLC0415
71
+ try:
72
+ await self._init_paddle_ocr(**kwargs)
127
73
 
128
- await self._init_paddle_ocr(**kwargs)
74
+ if image.mode != "RGB":
75
+ image = image.convert("RGB")
129
76
 
130
- if image.mode != "RGB":
131
- image = image.convert("RGB")
77
+ image_np = np.array(image)
78
+ use_textline_orientation = kwargs.get("use_textline_orientation", kwargs.get("use_angle_cls", True))
79
+ result = await run_sync(self._paddle_ocr.ocr, image_np, cls=use_textline_orientation)
132
80
 
133
- image_np = np.array(image)
134
- try:
135
- result = await run_sync(self._paddle_ocr.ocr, image_np, cls=kwargs.get("use_angle_cls", True))
136
- return self._process_paddle_result(result, image)
81
+ extraction_result = self._process_paddle_result(result, image)
82
+
83
+ if use_cache and cache_kwargs:
84
+ await cache_and_complete_async(extraction_result, cache_kwargs, use_cache)
85
+
86
+ return extraction_result
137
87
  except Exception as e:
88
+ if use_cache and cache_kwargs:
89
+ mark_processing_complete(cache_kwargs)
138
90
  raise OCRError(f"Failed to OCR using PaddleOCR: {e}") from e
139
91
 
140
92
  async def process_file(self, path: Path, **kwargs: Unpack[PaddleOCRConfig]) -> ExtractionResult:
141
- """Asynchronously process a file and extract its text and metadata using PaddleOCR.
93
+ use_cache = kwargs.pop("use_cache", True)
142
94
 
143
- Args:
144
- path: A Path object representing the file to be processed.
145
- **kwargs: Configuration parameters for PaddleOCR including language, detection thresholds, etc.
95
+ cache_kwargs = None
96
+ if use_cache:
97
+ file_info = get_file_info(path)
98
+ cache_kwargs = build_cache_kwargs("paddleocr", kwargs, file_info=file_info)
146
99
 
147
- Returns:
148
- ExtractionResult: The extraction result containing text content, mime type, and metadata.
100
+ cached_result = await handle_cache_lookup_async(cache_kwargs)
101
+ if cached_result:
102
+ return cached_result
149
103
 
150
- Raises:
151
- OCRError: If file loading or OCR processing fails.
152
- """
153
- await self._init_paddle_ocr(**kwargs)
154
104
  try:
105
+ await self._init_paddle_ocr(**kwargs)
155
106
  image = await run_sync(Image.open, path)
156
- return await self.process_image(image, **kwargs)
107
+
108
+ kwargs["use_cache"] = False
109
+ extraction_result = await self.process_image(image, **kwargs)
110
+
111
+ if use_cache and cache_kwargs:
112
+ await cache_and_complete_async(extraction_result, cache_kwargs, use_cache)
113
+
114
+ return extraction_result
157
115
  except Exception as e:
116
+ if use_cache and cache_kwargs:
117
+ mark_processing_complete(cache_kwargs)
158
118
  raise OCRError(f"Failed to load or process image using PaddleOCR: {e}") from e
159
119
 
160
120
  @staticmethod
161
121
  def _process_paddle_result(result: list[Any] | Any, image: Image.Image) -> ExtractionResult:
162
- """Process PaddleOCR result into an ExtractionResult with metadata.
163
-
164
- Args:
165
- result: The raw result from PaddleOCR.
166
- image: The original PIL image.
167
-
168
- Returns:
169
- ExtractionResult: The extraction result containing text content, mime type, and metadata.
170
- """
171
122
  text_content = ""
172
123
  confidence_sum = 0
173
124
  confidence_count = 0
@@ -227,11 +178,6 @@ class PaddleBackend(OCRBackend[PaddleOCRConfig]):
227
178
 
228
179
  @classmethod
229
180
  def _is_mkldnn_supported(cls) -> bool:
230
- """Check if the current architecture supports MKL-DNN optimization.
231
-
232
- Returns:
233
- True if MKL-DNN is supported on this architecture.
234
- """
235
181
  system = platform.system().lower()
236
182
  processor = platform.processor().lower()
237
183
  machine = platform.machine().lower()
@@ -246,59 +192,44 @@ class PaddleBackend(OCRBackend[PaddleOCRConfig]):
246
192
 
247
193
  @classmethod
248
194
  async def _init_paddle_ocr(cls, **kwargs: Unpack[PaddleOCRConfig]) -> None:
249
- """Initialize PaddleOCR with the provided configuration.
250
-
251
- Args:
252
- **kwargs: Configuration parameters for PaddleOCR including language, detection thresholds, etc.
253
-
254
- Raises:
255
- MissingDependencyError: If PaddleOCR is not installed.
256
- OCRError: If initialization fails.
257
- """
258
195
  if cls._paddle_ocr is not None:
259
196
  return
260
197
 
261
- try:
262
- from paddleocr import PaddleOCR # noqa: PLC0415
263
- except ImportError as e: # pragma: no cover
198
+ if not HAS_PADDLEOCR or PaddleOCR is None:
264
199
  raise MissingDependencyError.create_for_package(
265
200
  dependency_group="paddleocr", functionality="PaddleOCR as an OCR backend", package_name="paddleocr"
266
- ) from e
201
+ )
267
202
 
268
203
  language = cls._validate_language_code(kwargs.pop("language", "en"))
269
204
 
270
- device_info = cls._resolve_device_config(**kwargs)
271
- use_gpu = device_info.device_type == "cuda"
205
+ cls._resolve_device_config(**kwargs)
206
+
207
+ bool(find_spec("paddlepaddle_gpu"))
208
+
209
+ use_angle_cls = kwargs.pop("use_angle_cls", True)
210
+ kwargs.setdefault("use_textline_orientation", use_angle_cls)
211
+
212
+ det_db_thresh = kwargs.pop("det_db_thresh", 0.3)
213
+ det_db_box_thresh = kwargs.pop("det_db_box_thresh", 0.5)
214
+ det_db_unclip_ratio = kwargs.pop("det_db_unclip_ratio", 1.6)
272
215
 
273
- has_gpu_package = bool(find_spec("paddlepaddle_gpu"))
274
- kwargs.setdefault("use_angle_cls", True)
275
- kwargs["use_gpu"] = use_gpu and has_gpu_package
276
- kwargs.setdefault("enable_mkldnn", cls._is_mkldnn_supported() and not (use_gpu and has_gpu_package))
277
- kwargs.setdefault("det_db_thresh", 0.3)
278
- kwargs.setdefault("det_db_box_thresh", 0.5)
279
- kwargs.setdefault("det_db_unclip_ratio", 1.6)
216
+ kwargs.setdefault("text_det_thresh", det_db_thresh)
217
+ kwargs.setdefault("text_det_box_thresh", det_db_box_thresh)
218
+ kwargs.setdefault("text_det_unclip_ratio", det_db_unclip_ratio)
280
219
 
281
- if device_info.device_type == "cuda" and kwargs.get("gpu_memory_limit"):
282
- kwargs["gpu_mem"] = int(kwargs["gpu_memory_limit"] * 1024)
220
+ kwargs.pop("use_gpu", None)
221
+ kwargs.pop("gpu_mem", None)
222
+ kwargs.pop("gpu_memory_limit", None)
223
+
224
+ kwargs.setdefault("enable_mkldnn", cls._is_mkldnn_supported())
283
225
 
284
226
  try:
285
- cls._paddle_ocr = await run_sync(PaddleOCR, lang=language, show_log=False, **kwargs)
227
+ cls._paddle_ocr = await run_sync(PaddleOCR, lang=language, **kwargs)
286
228
  except Exception as e:
287
229
  raise OCRError(f"Failed to initialize PaddleOCR: {e}") from e
288
230
 
289
231
  @classmethod
290
232
  def _resolve_device_config(cls, **kwargs: Unpack[PaddleOCRConfig]) -> DeviceInfo:
291
- """Resolve device configuration with backward compatibility.
292
-
293
- Args:
294
- **kwargs: Configuration parameters including device settings.
295
-
296
- Returns:
297
- DeviceInfo object for the selected device.
298
-
299
- Raises:
300
- ValidationError: If requested device is not available and fallback is disabled.
301
- """
302
233
  use_gpu = kwargs.get("use_gpu", False)
303
234
  device = kwargs.get("device", "auto")
304
235
  memory_limit = kwargs.get("gpu_memory_limit")
@@ -343,17 +274,6 @@ class PaddleBackend(OCRBackend[PaddleOCRConfig]):
343
274
 
344
275
  @staticmethod
345
276
  def _validate_language_code(lang_code: str) -> str:
346
- """Convert a language code to PaddleOCR format.
347
-
348
- Args:
349
- lang_code: ISO language code or language name
350
-
351
- Raises:
352
- ValidationError: If the language is not supported by PaddleOCR
353
-
354
- Returns:
355
- Language code compatible with PaddleOCR
356
- """
357
277
  normalized = lang_code.lower()
358
278
  if normalized in PADDLEOCR_SUPPORTED_LANGUAGE_CODES:
359
279
  return normalized
@@ -367,90 +287,100 @@ class PaddleBackend(OCRBackend[PaddleOCRConfig]):
367
287
  )
368
288
 
369
289
  def process_image_sync(self, image: Image.Image, **kwargs: Unpack[PaddleOCRConfig]) -> ExtractionResult:
370
- """Synchronously process an image and extract its text and metadata using PaddleOCR.
290
+ use_cache = kwargs.pop("use_cache", True)
371
291
 
372
- Args:
373
- image: An instance of PIL.Image representing the input image.
374
- **kwargs: Configuration parameters for PaddleOCR including language, detection thresholds, etc.
292
+ cache_kwargs = None
293
+ if use_cache:
294
+ image_hash = generate_image_hash(image)
295
+ cache_kwargs = build_cache_kwargs("paddleocr", kwargs, image_hash=image_hash)
375
296
 
376
- Returns:
377
- ExtractionResult: The extraction result containing text content, mime type, and metadata.
297
+ cached_result = handle_cache_lookup_sync(cache_kwargs)
298
+ if cached_result:
299
+ return cached_result
378
300
 
379
- Raises:
380
- OCRError: If OCR processing fails.
381
- """
382
- import numpy as np # noqa: PLC0415
301
+ try:
302
+ self._init_paddle_ocr_sync(**kwargs)
383
303
 
384
- self._init_paddle_ocr_sync(**kwargs)
304
+ if image.mode != "RGB":
305
+ image = image.convert("RGB")
385
306
 
386
- if image.mode != "RGB":
387
- image = image.convert("RGB")
307
+ image_np = np.array(image)
308
+ use_textline_orientation = kwargs.get("use_textline_orientation", kwargs.get("use_angle_cls", True))
309
+ result = self._paddle_ocr.ocr(image_np, cls=use_textline_orientation)
388
310
 
389
- image_np = np.array(image)
390
- try:
391
- result = self._paddle_ocr.ocr(image_np, cls=kwargs.get("use_angle_cls", True))
392
- return self._process_paddle_result(result, image)
311
+ extraction_result = self._process_paddle_result(result, image)
312
+
313
+ if use_cache and cache_kwargs:
314
+ cache_and_complete_sync(extraction_result, cache_kwargs, use_cache)
315
+
316
+ return extraction_result
393
317
  except Exception as e:
318
+ if use_cache and cache_kwargs:
319
+ mark_processing_complete(cache_kwargs)
394
320
  raise OCRError(f"Failed to OCR using PaddleOCR: {e}") from e
395
321
 
396
322
  def process_file_sync(self, path: Path, **kwargs: Unpack[PaddleOCRConfig]) -> ExtractionResult:
397
- """Synchronously process a file and extract its text and metadata using PaddleOCR.
323
+ use_cache = kwargs.pop("use_cache", True)
398
324
 
399
- Args:
400
- path: A Path object representing the file to be processed.
401
- **kwargs: Configuration parameters for PaddleOCR including language, detection thresholds, etc.
325
+ cache_kwargs = None
326
+ if use_cache:
327
+ file_info = get_file_info(path)
328
+ cache_kwargs = build_cache_kwargs("paddleocr", kwargs, file_info=file_info)
402
329
 
403
- Returns:
404
- ExtractionResult: The extraction result containing text content, mime type, and metadata.
330
+ cached_result = handle_cache_lookup_sync(cache_kwargs)
331
+ if cached_result:
332
+ return cached_result
405
333
 
406
- Raises:
407
- OCRError: If file loading or OCR processing fails.
408
- """
409
- self._init_paddle_ocr_sync(**kwargs)
410
334
  try:
335
+ self._init_paddle_ocr_sync(**kwargs)
411
336
  image = Image.open(path)
412
- return self.process_image_sync(image, **kwargs)
337
+
338
+ kwargs["use_cache"] = False
339
+ extraction_result = self.process_image_sync(image, **kwargs)
340
+
341
+ if use_cache and cache_kwargs:
342
+ cache_and_complete_sync(extraction_result, cache_kwargs, use_cache)
343
+
344
+ return extraction_result
413
345
  except Exception as e:
346
+ if use_cache and cache_kwargs:
347
+ mark_processing_complete(cache_kwargs)
414
348
  raise OCRError(f"Failed to load or process image using PaddleOCR: {e}") from e
415
349
 
416
350
  @classmethod
417
351
  def _init_paddle_ocr_sync(cls, **kwargs: Unpack[PaddleOCRConfig]) -> None:
418
- """Synchronously initialize PaddleOCR with the provided configuration.
419
-
420
- Args:
421
- **kwargs: Configuration parameters for PaddleOCR including language, detection thresholds, etc.
422
-
423
- Raises:
424
- MissingDependencyError: If PaddleOCR is not installed.
425
- OCRError: If initialization fails.
426
- """
427
352
  if cls._paddle_ocr is not None:
428
353
  return
429
354
 
430
- try:
431
- from paddleocr import PaddleOCR # noqa: PLC0415
432
- except ImportError as e: # pragma: no cover
355
+ if not HAS_PADDLEOCR or PaddleOCR is None:
433
356
  raise MissingDependencyError.create_for_package(
434
357
  dependency_group="paddleocr", functionality="PaddleOCR as an OCR backend", package_name="paddleocr"
435
- ) from e
358
+ )
436
359
 
437
360
  language = cls._validate_language_code(kwargs.pop("language", "en"))
438
361
 
439
- device_info = cls._resolve_device_config(**kwargs)
440
- use_gpu = device_info.device_type == "cuda"
362
+ cls._resolve_device_config(**kwargs)
363
+
364
+ bool(find_spec("paddlepaddle_gpu"))
365
+
366
+ use_angle_cls = kwargs.pop("use_angle_cls", True)
367
+ kwargs.setdefault("use_textline_orientation", use_angle_cls)
368
+
369
+ det_db_thresh = kwargs.pop("det_db_thresh", 0.3)
370
+ det_db_box_thresh = kwargs.pop("det_db_box_thresh", 0.5)
371
+ det_db_unclip_ratio = kwargs.pop("det_db_unclip_ratio", 1.6)
372
+
373
+ kwargs.setdefault("text_det_thresh", det_db_thresh)
374
+ kwargs.setdefault("text_det_box_thresh", det_db_box_thresh)
375
+ kwargs.setdefault("text_det_unclip_ratio", det_db_unclip_ratio)
441
376
 
442
- has_gpu_package = bool(find_spec("paddlepaddle_gpu"))
443
- kwargs.setdefault("use_angle_cls", True)
444
- kwargs["use_gpu"] = use_gpu and has_gpu_package
445
- kwargs.setdefault("enable_mkldnn", cls._is_mkldnn_supported() and not (use_gpu and has_gpu_package))
446
- kwargs.setdefault("det_db_thresh", 0.3)
447
- kwargs.setdefault("det_db_box_thresh", 0.5)
448
- kwargs.setdefault("det_db_unclip_ratio", 1.6)
377
+ kwargs.pop("use_gpu", None)
378
+ kwargs.pop("gpu_mem", None)
379
+ kwargs.pop("gpu_memory_limit", None)
449
380
 
450
- if device_info.device_type == "cuda" and kwargs.get("gpu_memory_limit"):
451
- kwargs["gpu_mem"] = int(kwargs["gpu_memory_limit"] * 1024)
381
+ kwargs.setdefault("enable_mkldnn", cls._is_mkldnn_supported())
452
382
 
453
383
  try:
454
- cls._paddle_ocr = PaddleOCR(lang=language, show_log=False, **kwargs)
384
+ cls._paddle_ocr = PaddleOCR(lang=language, **kwargs)
455
385
  except Exception as e:
456
386
  raise OCRError(f"Failed to initialize PaddleOCR: {e}") from e