kreuzberg 3.1.7__tar.gz → 3.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/PKG-INFO +7 -15
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/README.md +2 -10
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_extractors/_presentation.py +1 -1
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_ocr/_easyocr.py +66 -4
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_ocr/_paddleocr.py +80 -3
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_ocr/_tesseract.py +1 -1
- kreuzberg-3.2.0/kreuzberg/_utils/_device.py +373 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg.egg-info/PKG-INFO +7 -15
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg.egg-info/SOURCES.txt +1 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg.egg-info/requires.txt +4 -4
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/pyproject.toml +24 -22
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/LICENSE +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/__init__.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_chunker.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_constants.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_extractors/__init__.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_extractors/_base.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_extractors/_html.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_extractors/_image.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_extractors/_pandoc.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_extractors/_pdf.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_extractors/_spread_sheet.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_gmft.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_mime_types.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_ocr/__init__.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_ocr/_base.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_playa.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_registry.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_types.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_utils/__init__.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_utils/_string.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_utils/_sync.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/_utils/_tmp.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/exceptions.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/extraction.py +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg/py.typed +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg.egg-info/dependency_links.txt +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/kreuzberg.egg-info/top_level.txt +0 -0
- {kreuzberg-3.1.7 → kreuzberg-3.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.2.0
|
4
4
|
Summary: A text extraction library supporting PDFs, images, office documents and more
|
5
5
|
Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
|
6
6
|
License: MIT
|
@@ -27,8 +27,8 @@ License-File: LICENSE
|
|
27
27
|
Requires-Dist: anyio>=4.9.0
|
28
28
|
Requires-Dist: charset-normalizer>=3.4.2
|
29
29
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < "3.11"
|
30
|
-
Requires-Dist: html-to-markdown>=1.
|
31
|
-
Requires-Dist: playa-pdf>=0.
|
30
|
+
Requires-Dist: html-to-markdown>=1.4.0
|
31
|
+
Requires-Dist: playa-pdf>=0.6.1
|
32
32
|
Requires-Dist: pypdfium2==4.30.0
|
33
33
|
Requires-Dist: python-calamine>=0.3.2
|
34
34
|
Requires-Dist: python-pptx>=1.0.2
|
@@ -36,7 +36,7 @@ Requires-Dist: typing-extensions>=4.14.0; python_version < "3.12"
|
|
36
36
|
Provides-Extra: all
|
37
37
|
Requires-Dist: easyocr>=1.7.2; extra == "all"
|
38
38
|
Requires-Dist: gmft>=0.4.1; extra == "all"
|
39
|
-
Requires-Dist: paddleocr>=3.0.
|
39
|
+
Requires-Dist: paddleocr>=3.0.2; extra == "all"
|
40
40
|
Requires-Dist: paddlepaddle>=3.0.0; extra == "all"
|
41
41
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == "all"
|
42
42
|
Requires-Dist: setuptools>=80.9.0; extra == "all"
|
@@ -47,7 +47,7 @@ Requires-Dist: easyocr>=1.7.2; extra == "easyocr"
|
|
47
47
|
Provides-Extra: gmft
|
48
48
|
Requires-Dist: gmft>=0.4.1; extra == "gmft"
|
49
49
|
Provides-Extra: paddleocr
|
50
|
-
Requires-Dist: paddleocr>=3.0.
|
50
|
+
Requires-Dist: paddleocr>=3.0.2; extra == "paddleocr"
|
51
51
|
Requires-Dist: paddlepaddle>=3.0.0; extra == "paddleocr"
|
52
52
|
Requires-Dist: setuptools>=80.9.0; extra == "paddleocr"
|
53
53
|
Dynamic: license-file
|
@@ -157,17 +157,9 @@ Kreuzberg supports multiple OCR engines:
|
|
157
157
|
|
158
158
|
For comparison and selection guidance, see the [OCR Backends](https://goldziher.github.io/kreuzberg/user-guide/ocr-backends/) documentation.
|
159
159
|
|
160
|
-
##
|
160
|
+
## Contributing
|
161
161
|
|
162
|
-
|
163
|
-
|
164
|
-
### Local Development
|
165
|
-
|
166
|
-
- Clone the repo
|
167
|
-
- Install the system dependencies
|
168
|
-
- Install the full dependencies with `uv sync`
|
169
|
-
- Install the pre-commit hooks with: `pre-commit install && pre-commit install --hook-type commit-msg`
|
170
|
-
- Make your changes and submit a PR
|
162
|
+
We welcome contributions! Please see our [Contributing Guide](docs/contributing.md) for details on setting up your development environment and submitting pull requests.
|
171
163
|
|
172
164
|
## License
|
173
165
|
|
@@ -103,17 +103,9 @@ Kreuzberg supports multiple OCR engines:
|
|
103
103
|
|
104
104
|
For comparison and selection guidance, see the [OCR Backends](https://goldziher.github.io/kreuzberg/user-guide/ocr-backends/) documentation.
|
105
105
|
|
106
|
-
##
|
106
|
+
## Contributing
|
107
107
|
|
108
|
-
|
109
|
-
|
110
|
-
### Local Development
|
111
|
-
|
112
|
-
- Clone the repo
|
113
|
-
- Install the system dependencies
|
114
|
-
- Install the full dependencies with `uv sync`
|
115
|
-
- Install the pre-commit hooks with: `pre-commit install && pre-commit install --hook-type commit-msg`
|
116
|
-
- Make your changes and submit a PR
|
108
|
+
We welcome contributions! Please see our [Contributing Guide](docs/contributing.md) for details on setting up your development environment and submitting pull requests.
|
117
109
|
|
118
110
|
## License
|
119
111
|
|
@@ -202,7 +202,7 @@ class PresentationExtractor(Extractor):
|
|
202
202
|
("keywords", "keywords"),
|
203
203
|
("modified_by", "last_modified_by"),
|
204
204
|
("modified_at", "modified"),
|
205
|
-
("version", "revision"), # if version and revision are given, version overwrites
|
205
|
+
("version", "revision"), # if version and revision are given, version overwrites
|
206
206
|
("subject", "subject"),
|
207
207
|
("title", "title"),
|
208
208
|
("version", "version"),
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import warnings
|
3
4
|
from dataclasses import dataclass
|
4
5
|
from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
|
5
6
|
|
@@ -8,6 +9,7 @@ from PIL import Image
|
|
8
9
|
from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
|
9
10
|
from kreuzberg._ocr._base import OCRBackend
|
10
11
|
from kreuzberg._types import ExtractionResult, Metadata
|
12
|
+
from kreuzberg._utils._device import DeviceInfo, DeviceType, validate_device_request
|
11
13
|
from kreuzberg._utils._string import normalize_spaces
|
12
14
|
from kreuzberg._utils._sync import run_sync
|
13
15
|
from kreuzberg.exceptions import MissingDependencyError, OCRError, ValidationError
|
@@ -144,7 +146,13 @@ class EasyOCRConfig:
|
|
144
146
|
text_threshold: float = 0.7
|
145
147
|
"""Text confidence threshold."""
|
146
148
|
use_gpu: bool = False
|
147
|
-
"""Whether to use GPU for inference."""
|
149
|
+
"""Whether to use GPU for inference. DEPRECATED: Use 'device' parameter instead."""
|
150
|
+
device: DeviceType = "auto"
|
151
|
+
"""Device to use for inference. Options: 'cpu', 'cuda', 'mps', 'auto'."""
|
152
|
+
gpu_memory_limit: float | None = None
|
153
|
+
"""Maximum GPU memory to use in GB. None for no limit."""
|
154
|
+
fallback_to_cpu: bool = True
|
155
|
+
"""Whether to fallback to CPU if requested device is unavailable."""
|
148
156
|
width_ths: float = 0.5
|
149
157
|
"""Maximum horizontal distance for merging boxes."""
|
150
158
|
x_ths: float = 1.0
|
@@ -336,8 +344,11 @@ class EasyOCRBackend(OCRBackend[EasyOCRConfig]):
|
|
336
344
|
) from e
|
337
345
|
|
338
346
|
languages = cls._validate_language_code(kwargs.pop("language", "en"))
|
339
|
-
|
340
|
-
|
347
|
+
|
348
|
+
# Handle device selection with backward compatibility
|
349
|
+
device_info = cls._resolve_device_config(**kwargs)
|
350
|
+
use_gpu = device_info.device_type in ("cuda", "mps")
|
351
|
+
|
341
352
|
kwargs.setdefault("detector", True)
|
342
353
|
kwargs.setdefault("recognizer", True)
|
343
354
|
kwargs.setdefault("download_enabled", True)
|
@@ -347,12 +358,63 @@ class EasyOCRBackend(OCRBackend[EasyOCRConfig]):
|
|
347
358
|
cls._reader = await run_sync(
|
348
359
|
easyocr.Reader,
|
349
360
|
languages,
|
350
|
-
gpu=
|
361
|
+
gpu=use_gpu,
|
351
362
|
verbose=False,
|
352
363
|
)
|
353
364
|
except Exception as e:
|
354
365
|
raise OCRError(f"Failed to initialize EasyOCR: {e}") from e
|
355
366
|
|
367
|
+
@classmethod
|
368
|
+
def _resolve_device_config(cls, **kwargs: Unpack[EasyOCRConfig]) -> DeviceInfo:
|
369
|
+
"""Resolve device configuration with backward compatibility.
|
370
|
+
|
371
|
+
Args:
|
372
|
+
**kwargs: Configuration parameters including device settings.
|
373
|
+
|
374
|
+
Returns:
|
375
|
+
DeviceInfo object for the selected device.
|
376
|
+
|
377
|
+
Raises:
|
378
|
+
ValidationError: If requested device is not available and fallback is disabled.
|
379
|
+
"""
|
380
|
+
# Handle deprecated use_gpu parameter
|
381
|
+
use_gpu = kwargs.get("use_gpu", False)
|
382
|
+
device = kwargs.get("device", "auto")
|
383
|
+
memory_limit = kwargs.get("gpu_memory_limit")
|
384
|
+
fallback_to_cpu = kwargs.get("fallback_to_cpu", True)
|
385
|
+
|
386
|
+
# Check for deprecated parameter usage
|
387
|
+
if use_gpu and device == "auto":
|
388
|
+
warnings.warn(
|
389
|
+
"The 'use_gpu' parameter is deprecated and will be removed in a future version. "
|
390
|
+
"Use 'device=\"cuda\"' or 'device=\"auto\"' instead.",
|
391
|
+
DeprecationWarning,
|
392
|
+
stacklevel=4,
|
393
|
+
)
|
394
|
+
# Convert deprecated use_gpu=True to device="auto"
|
395
|
+
device = "auto" if use_gpu else "cpu"
|
396
|
+
elif use_gpu and device != "auto":
|
397
|
+
warnings.warn(
|
398
|
+
"Both 'use_gpu' and 'device' parameters specified. The 'use_gpu' parameter is deprecated. "
|
399
|
+
"Using 'device' parameter value.",
|
400
|
+
DeprecationWarning,
|
401
|
+
stacklevel=4,
|
402
|
+
)
|
403
|
+
|
404
|
+
# Validate and get device info
|
405
|
+
try:
|
406
|
+
return validate_device_request(
|
407
|
+
device,
|
408
|
+
"EasyOCR",
|
409
|
+
memory_limit=memory_limit,
|
410
|
+
fallback_to_cpu=fallback_to_cpu,
|
411
|
+
)
|
412
|
+
except ValidationError:
|
413
|
+
# If device validation fails and we're using deprecated use_gpu=False, fallback to CPU
|
414
|
+
if not use_gpu and device == "cpu":
|
415
|
+
return DeviceInfo(device_type="cpu", name="CPU")
|
416
|
+
raise
|
417
|
+
|
356
418
|
@staticmethod
|
357
419
|
def _validate_language_code(language_codes: str | list[str]) -> list[str]:
|
358
420
|
"""Validate and normalize provided language codes.
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import platform
|
4
|
+
import warnings
|
4
5
|
from dataclasses import dataclass
|
5
6
|
from importlib.util import find_spec
|
6
7
|
from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
|
@@ -10,6 +11,7 @@ from PIL import Image
|
|
10
11
|
from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
|
11
12
|
from kreuzberg._ocr._base import OCRBackend
|
12
13
|
from kreuzberg._types import ExtractionResult, Metadata
|
14
|
+
from kreuzberg._utils._device import DeviceInfo, DeviceType, validate_device_request
|
13
15
|
from kreuzberg._utils._string import normalize_spaces
|
14
16
|
from kreuzberg._utils._sync import run_sync
|
15
17
|
from kreuzberg.exceptions import MissingDependencyError, OCRError, ValidationError
|
@@ -91,7 +93,13 @@ class PaddleOCRConfig:
|
|
91
93
|
use_angle_cls: bool = True
|
92
94
|
"""Whether to use text orientation classification model."""
|
93
95
|
use_gpu: bool = False
|
94
|
-
"""Whether to use GPU for inference.
|
96
|
+
"""Whether to use GPU for inference. DEPRECATED: Use 'device' parameter instead."""
|
97
|
+
device: DeviceType = "auto"
|
98
|
+
"""Device to use for inference. Options: 'cpu', 'cuda', 'auto'. Note: MPS not supported by PaddlePaddle."""
|
99
|
+
gpu_memory_limit: float | None = None
|
100
|
+
"""Maximum GPU memory to use in GB. None for no limit."""
|
101
|
+
fallback_to_cpu: bool = True
|
102
|
+
"""Whether to fallback to CPU if requested device is unavailable."""
|
95
103
|
use_space_char: bool = True
|
96
104
|
"""Whether to recognize spaces."""
|
97
105
|
use_zero_copy_run: bool = False
|
@@ -248,19 +256,88 @@ class PaddleBackend(OCRBackend[PaddleOCRConfig]):
|
|
248
256
|
) from e
|
249
257
|
|
250
258
|
language = cls._validate_language_code(kwargs.pop("language", "en"))
|
259
|
+
|
260
|
+
# Handle device selection with backward compatibility
|
261
|
+
device_info = cls._resolve_device_config(**kwargs)
|
262
|
+
use_gpu = device_info.device_type == "cuda"
|
263
|
+
|
251
264
|
has_gpu_package = bool(find_spec("paddlepaddle_gpu"))
|
252
265
|
kwargs.setdefault("use_angle_cls", True)
|
253
|
-
kwargs
|
254
|
-
kwargs.setdefault("enable_mkldnn", cls._is_mkldnn_supported() and not has_gpu_package)
|
266
|
+
kwargs["use_gpu"] = use_gpu and has_gpu_package
|
267
|
+
kwargs.setdefault("enable_mkldnn", cls._is_mkldnn_supported() and not (use_gpu and has_gpu_package))
|
255
268
|
kwargs.setdefault("det_db_thresh", 0.3)
|
256
269
|
kwargs.setdefault("det_db_box_thresh", 0.5)
|
257
270
|
kwargs.setdefault("det_db_unclip_ratio", 1.6)
|
258
271
|
|
272
|
+
# Set GPU memory limit if specified
|
273
|
+
if device_info.device_type == "cuda" and kwargs.get("gpu_memory_limit"):
|
274
|
+
kwargs["gpu_mem"] = int(kwargs["gpu_memory_limit"] * 1024) # Convert GB to MB
|
275
|
+
|
259
276
|
try:
|
260
277
|
cls._paddle_ocr = await run_sync(PaddleOCR, lang=language, show_log=False, **kwargs)
|
261
278
|
except Exception as e:
|
262
279
|
raise OCRError(f"Failed to initialize PaddleOCR: {e}") from e
|
263
280
|
|
281
|
+
@classmethod
|
282
|
+
def _resolve_device_config(cls, **kwargs: Unpack[PaddleOCRConfig]) -> DeviceInfo:
|
283
|
+
"""Resolve device configuration with backward compatibility.
|
284
|
+
|
285
|
+
Args:
|
286
|
+
**kwargs: Configuration parameters including device settings.
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
DeviceInfo object for the selected device.
|
290
|
+
|
291
|
+
Raises:
|
292
|
+
ValidationError: If requested device is not available and fallback is disabled.
|
293
|
+
"""
|
294
|
+
# Handle deprecated use_gpu parameter
|
295
|
+
use_gpu = kwargs.get("use_gpu", False)
|
296
|
+
device = kwargs.get("device", "auto")
|
297
|
+
memory_limit = kwargs.get("gpu_memory_limit")
|
298
|
+
fallback_to_cpu = kwargs.get("fallback_to_cpu", True)
|
299
|
+
|
300
|
+
# Check for deprecated parameter usage
|
301
|
+
if use_gpu and device == "auto":
|
302
|
+
warnings.warn(
|
303
|
+
"The 'use_gpu' parameter is deprecated and will be removed in a future version. "
|
304
|
+
"Use 'device=\"cuda\"' or 'device=\"auto\"' instead.",
|
305
|
+
DeprecationWarning,
|
306
|
+
stacklevel=4,
|
307
|
+
)
|
308
|
+
# Convert deprecated use_gpu=True to device="auto"
|
309
|
+
device = "auto" if use_gpu else "cpu"
|
310
|
+
elif use_gpu and device != "auto":
|
311
|
+
warnings.warn(
|
312
|
+
"Both 'use_gpu' and 'device' parameters specified. The 'use_gpu' parameter is deprecated. "
|
313
|
+
"Using 'device' parameter value.",
|
314
|
+
DeprecationWarning,
|
315
|
+
stacklevel=4,
|
316
|
+
)
|
317
|
+
|
318
|
+
# PaddlePaddle doesn't support MPS, so warn if requested
|
319
|
+
if device == "mps":
|
320
|
+
warnings.warn(
|
321
|
+
"PaddlePaddle does not support MPS (Apple Silicon) acceleration. Falling back to CPU.",
|
322
|
+
UserWarning,
|
323
|
+
stacklevel=4,
|
324
|
+
)
|
325
|
+
device = "cpu"
|
326
|
+
|
327
|
+
# Validate and get device info
|
328
|
+
try:
|
329
|
+
return validate_device_request(
|
330
|
+
device,
|
331
|
+
"PaddleOCR",
|
332
|
+
memory_limit=memory_limit,
|
333
|
+
fallback_to_cpu=fallback_to_cpu,
|
334
|
+
)
|
335
|
+
except ValidationError:
|
336
|
+
# If device validation fails and we're using deprecated use_gpu=False, fallback to CPU
|
337
|
+
if not use_gpu and device == "cpu":
|
338
|
+
return DeviceInfo(device_type="cpu", name="CPU")
|
339
|
+
raise
|
340
|
+
|
264
341
|
@staticmethod
|
265
342
|
def _validate_language_code(lang_code: str) -> str:
|
266
343
|
"""Convert a language code to PaddleOCR format.
|
@@ -264,7 +264,7 @@ class TesseractBackend(OCRBackend[TesseractConfig]):
|
|
264
264
|
|
265
265
|
env: dict[str, Any] | None = None
|
266
266
|
if sys.platform.startswith("linux"):
|
267
|
-
# we have to prevent multithreading this way otherwise we will get deadlocks
|
267
|
+
# we have to prevent multithreading this way otherwise we will get deadlocks
|
268
268
|
env = {"OMP_THREAD_LIMIT": "1"}
|
269
269
|
|
270
270
|
result = await run_process(command, env=env)
|
@@ -0,0 +1,373 @@
|
|
1
|
+
"""Device detection and management utilities for GPU acceleration."""
|
2
|
+
# ruff: noqa: BLE001
|
3
|
+
|
4
|
+
from __future__ import annotations
|
5
|
+
|
6
|
+
import warnings
|
7
|
+
from dataclasses import dataclass
|
8
|
+
from typing import Literal
|
9
|
+
|
10
|
+
from kreuzberg.exceptions import ValidationError
|
11
|
+
|
12
|
+
DeviceType = Literal["cpu", "cuda", "mps", "auto"]
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass(frozen=True)
|
16
|
+
class DeviceInfo:
|
17
|
+
"""Information about a compute device."""
|
18
|
+
|
19
|
+
device_type: Literal["cpu", "cuda", "mps"]
|
20
|
+
"""The type of device."""
|
21
|
+
device_id: int | None = None
|
22
|
+
"""Device ID for multi-GPU systems. None for CPU or single GPU."""
|
23
|
+
memory_total: float | None = None
|
24
|
+
"""Total memory in GB. None if unknown."""
|
25
|
+
memory_available: float | None = None
|
26
|
+
"""Available memory in GB. None if unknown."""
|
27
|
+
name: str | None = None
|
28
|
+
"""Human-readable device name."""
|
29
|
+
|
30
|
+
|
31
|
+
def detect_available_devices() -> list[DeviceInfo]:
|
32
|
+
"""Detect all available compute devices.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
List of available devices, with the most preferred device first.
|
36
|
+
"""
|
37
|
+
devices: list[DeviceInfo] = []
|
38
|
+
|
39
|
+
# Always include CPU as fallback
|
40
|
+
devices.append(
|
41
|
+
DeviceInfo(
|
42
|
+
device_type="cpu",
|
43
|
+
name="CPU",
|
44
|
+
)
|
45
|
+
)
|
46
|
+
|
47
|
+
# Check for CUDA (NVIDIA GPUs)
|
48
|
+
if _is_cuda_available():
|
49
|
+
cuda_devices = _get_cuda_devices()
|
50
|
+
devices.extend(cuda_devices)
|
51
|
+
|
52
|
+
# Check for MPS (Apple Silicon)
|
53
|
+
if _is_mps_available():
|
54
|
+
mps_device = _get_mps_device()
|
55
|
+
if mps_device:
|
56
|
+
devices.append(mps_device)
|
57
|
+
|
58
|
+
# Reorder to put GPU devices first
|
59
|
+
gpu_devices = [d for d in devices if d.device_type != "cpu"]
|
60
|
+
cpu_devices = [d for d in devices if d.device_type == "cpu"]
|
61
|
+
|
62
|
+
return gpu_devices + cpu_devices
|
63
|
+
|
64
|
+
|
65
|
+
def get_optimal_device() -> DeviceInfo:
|
66
|
+
"""Get the optimal device for OCR processing.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
The best available device, preferring GPU over CPU.
|
70
|
+
"""
|
71
|
+
devices = detect_available_devices()
|
72
|
+
return devices[0] if devices else DeviceInfo(device_type="cpu", name="CPU")
|
73
|
+
|
74
|
+
|
75
|
+
def validate_device_request(
|
76
|
+
requested: DeviceType,
|
77
|
+
backend: str,
|
78
|
+
*,
|
79
|
+
memory_limit: float | None = None,
|
80
|
+
fallback_to_cpu: bool = True,
|
81
|
+
) -> DeviceInfo:
|
82
|
+
"""Validate and resolve a device request.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
requested: The requested device type.
|
86
|
+
backend: Name of the OCR backend requesting the device.
|
87
|
+
memory_limit: Optional memory limit in GB.
|
88
|
+
fallback_to_cpu: Whether to fallback to CPU if requested device unavailable.
|
89
|
+
|
90
|
+
Returns:
|
91
|
+
A validated DeviceInfo object.
|
92
|
+
|
93
|
+
Raises:
|
94
|
+
ValidationError: If the requested device is not available and fallback is disabled.
|
95
|
+
"""
|
96
|
+
available_devices = detect_available_devices()
|
97
|
+
|
98
|
+
# Handle auto device selection
|
99
|
+
if requested == "auto":
|
100
|
+
device = get_optimal_device()
|
101
|
+
if memory_limit is not None:
|
102
|
+
_validate_memory_limit(device, memory_limit)
|
103
|
+
return device
|
104
|
+
|
105
|
+
# Find requested device
|
106
|
+
matching_devices = [d for d in available_devices if d.device_type == requested]
|
107
|
+
|
108
|
+
if not matching_devices:
|
109
|
+
if fallback_to_cpu and requested != "cpu":
|
110
|
+
warnings.warn(
|
111
|
+
f"Requested device '{requested}' not available for {backend}. Falling back to CPU.",
|
112
|
+
UserWarning,
|
113
|
+
stacklevel=2,
|
114
|
+
)
|
115
|
+
cpu_device = next((d for d in available_devices if d.device_type == "cpu"), None)
|
116
|
+
if cpu_device:
|
117
|
+
return cpu_device
|
118
|
+
|
119
|
+
raise ValidationError(
|
120
|
+
f"Requested device '{requested}' is not available for {backend}",
|
121
|
+
context={
|
122
|
+
"requested_device": requested,
|
123
|
+
"backend": backend,
|
124
|
+
"available_devices": [d.device_type for d in available_devices],
|
125
|
+
},
|
126
|
+
)
|
127
|
+
|
128
|
+
# Use the first matching device (typically the best one)
|
129
|
+
device = matching_devices[0]
|
130
|
+
|
131
|
+
# Validate memory limit if specified
|
132
|
+
if memory_limit is not None:
|
133
|
+
_validate_memory_limit(device, memory_limit)
|
134
|
+
|
135
|
+
return device
|
136
|
+
|
137
|
+
|
138
|
+
def get_device_memory_info(device: DeviceInfo) -> tuple[float | None, float | None]:
|
139
|
+
"""Get memory information for a device.
|
140
|
+
|
141
|
+
Args:
|
142
|
+
device: The device to query.
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
Tuple of (total_memory_gb, available_memory_gb). None values if unknown.
|
146
|
+
"""
|
147
|
+
if device.device_type == "cpu":
|
148
|
+
return None, None
|
149
|
+
|
150
|
+
if device.device_type == "cuda":
|
151
|
+
return _get_cuda_memory_info(device.device_id or 0)
|
152
|
+
|
153
|
+
if device.device_type == "mps":
|
154
|
+
return _get_mps_memory_info()
|
155
|
+
|
156
|
+
return None, None
|
157
|
+
|
158
|
+
|
159
|
+
def _is_cuda_available() -> bool:
|
160
|
+
"""Check if CUDA is available."""
|
161
|
+
try:
|
162
|
+
import torch
|
163
|
+
|
164
|
+
return torch.cuda.is_available()
|
165
|
+
except ImportError:
|
166
|
+
return False
|
167
|
+
|
168
|
+
|
169
|
+
def _is_mps_available() -> bool:
|
170
|
+
"""Check if MPS (Apple Silicon) is available."""
|
171
|
+
try:
|
172
|
+
import torch
|
173
|
+
|
174
|
+
return torch.backends.mps.is_available()
|
175
|
+
except ImportError:
|
176
|
+
return False
|
177
|
+
|
178
|
+
|
179
|
+
def _get_cuda_devices() -> list[DeviceInfo]:
|
180
|
+
"""Get information about available CUDA devices."""
|
181
|
+
devices: list[DeviceInfo] = []
|
182
|
+
|
183
|
+
try:
|
184
|
+
import torch
|
185
|
+
|
186
|
+
if not torch.cuda.is_available():
|
187
|
+
return devices
|
188
|
+
|
189
|
+
for i in range(torch.cuda.device_count()):
|
190
|
+
props = torch.cuda.get_device_properties(i)
|
191
|
+
total_memory = props.total_memory / (1024**3) # Convert to GB
|
192
|
+
|
193
|
+
# Get available memory
|
194
|
+
torch.cuda.set_device(i)
|
195
|
+
available_memory = torch.cuda.get_device_properties(i).total_memory / (1024**3)
|
196
|
+
try:
|
197
|
+
# Try to get current memory usage
|
198
|
+
allocated = torch.cuda.memory_allocated(i) / (1024**3)
|
199
|
+
available_memory = total_memory - allocated
|
200
|
+
except Exception:
|
201
|
+
# Fallback to total memory if we can't get allocation info
|
202
|
+
available_memory = total_memory
|
203
|
+
|
204
|
+
devices.append(
|
205
|
+
DeviceInfo(
|
206
|
+
device_type="cuda",
|
207
|
+
device_id=i,
|
208
|
+
memory_total=total_memory,
|
209
|
+
memory_available=available_memory,
|
210
|
+
name=props.name,
|
211
|
+
)
|
212
|
+
)
|
213
|
+
|
214
|
+
except ImportError:
|
215
|
+
pass
|
216
|
+
|
217
|
+
return devices
|
218
|
+
|
219
|
+
|
220
|
+
def _get_mps_device() -> DeviceInfo | None:
|
221
|
+
"""Get information about the MPS device."""
|
222
|
+
try:
|
223
|
+
import torch
|
224
|
+
|
225
|
+
if not torch.backends.mps.is_available():
|
226
|
+
return None
|
227
|
+
|
228
|
+
# MPS doesn't provide detailed memory info
|
229
|
+
return DeviceInfo(
|
230
|
+
device_type="mps",
|
231
|
+
name="Apple Silicon GPU (MPS)",
|
232
|
+
)
|
233
|
+
|
234
|
+
except ImportError:
|
235
|
+
return None
|
236
|
+
|
237
|
+
|
238
|
+
def _get_cuda_memory_info(device_id: int) -> tuple[float | None, float | None]:
|
239
|
+
"""Get CUDA memory information for a specific device."""
|
240
|
+
try:
|
241
|
+
import torch
|
242
|
+
|
243
|
+
if not torch.cuda.is_available():
|
244
|
+
return None, None
|
245
|
+
|
246
|
+
props = torch.cuda.get_device_properties(device_id)
|
247
|
+
total_memory = props.total_memory / (1024**3)
|
248
|
+
|
249
|
+
try:
|
250
|
+
allocated = torch.cuda.memory_allocated(device_id) / (1024**3)
|
251
|
+
available_memory = total_memory - allocated
|
252
|
+
except Exception:
|
253
|
+
available_memory = total_memory
|
254
|
+
|
255
|
+
return total_memory, available_memory
|
256
|
+
|
257
|
+
except ImportError:
|
258
|
+
return None, None
|
259
|
+
|
260
|
+
|
261
|
+
def _get_mps_memory_info() -> tuple[float | None, float | None]:
|
262
|
+
"""Get MPS memory information."""
|
263
|
+
# MPS doesn't provide detailed memory info through PyTorch
|
264
|
+
# We could potentially use system calls but that's platform-specific
|
265
|
+
return None, None
|
266
|
+
|
267
|
+
|
268
|
+
def _validate_memory_limit(device: DeviceInfo, memory_limit: float) -> None:
|
269
|
+
"""Validate that a device has enough memory for the requested limit.
|
270
|
+
|
271
|
+
Args:
|
272
|
+
device: The device to validate.
|
273
|
+
memory_limit: Required memory in GB.
|
274
|
+
|
275
|
+
Raises:
|
276
|
+
ValidationError: If the device doesn't have enough memory.
|
277
|
+
"""
|
278
|
+
if device.device_type == "cpu":
|
279
|
+
# CPU memory validation is complex and OS-dependent, skip for now
|
280
|
+
return
|
281
|
+
|
282
|
+
total_memory, available_memory = get_device_memory_info(device)
|
283
|
+
|
284
|
+
if total_memory is not None and memory_limit > total_memory:
|
285
|
+
raise ValidationError(
|
286
|
+
f"Requested memory limit ({memory_limit:.1f}GB) exceeds device capacity ({total_memory:.1f}GB)",
|
287
|
+
context={
|
288
|
+
"device": device.device_type,
|
289
|
+
"device_name": device.name,
|
290
|
+
"requested_memory": memory_limit,
|
291
|
+
"total_memory": total_memory,
|
292
|
+
"available_memory": available_memory,
|
293
|
+
},
|
294
|
+
)
|
295
|
+
|
296
|
+
if available_memory is not None and memory_limit > available_memory:
|
297
|
+
warnings.warn(
|
298
|
+
f"Requested memory limit ({memory_limit:.1f}GB) exceeds available memory "
|
299
|
+
f"({available_memory:.1f}GB) on {device.name or device.device_type}",
|
300
|
+
UserWarning,
|
301
|
+
stacklevel=3,
|
302
|
+
)
|
303
|
+
|
304
|
+
|
305
|
+
def is_backend_gpu_compatible(backend: str) -> bool:
|
306
|
+
"""Check if an OCR backend supports GPU acceleration.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
backend: Name of the OCR backend.
|
310
|
+
|
311
|
+
Returns:
|
312
|
+
True if the backend supports GPU acceleration.
|
313
|
+
"""
|
314
|
+
# EasyOCR and PaddleOCR support GPU, Tesseract does not
|
315
|
+
return backend.lower() in ("easyocr", "paddleocr")
|
316
|
+
|
317
|
+
|
318
|
+
def get_recommended_batch_size(device: DeviceInfo, input_size_mb: float = 10.0) -> int:
|
319
|
+
"""Get recommended batch size for OCR processing.
|
320
|
+
|
321
|
+
Args:
|
322
|
+
device: The device to optimize for.
|
323
|
+
input_size_mb: Estimated input size per item in MB.
|
324
|
+
|
325
|
+
Returns:
|
326
|
+
Recommended batch size.
|
327
|
+
"""
|
328
|
+
if device.device_type == "cpu":
|
329
|
+
# Conservative batch size for CPU
|
330
|
+
return 1
|
331
|
+
|
332
|
+
# For GPU devices, estimate based on available memory
|
333
|
+
_, available_memory = get_device_memory_info(device)
|
334
|
+
|
335
|
+
if available_memory is None:
|
336
|
+
# Conservative default for unknown memory
|
337
|
+
return 4
|
338
|
+
|
339
|
+
# Reserve some memory for model and intermediate calculations
|
340
|
+
# Use approximately 50% of available memory for batching
|
341
|
+
usable_memory_gb = available_memory * 0.5
|
342
|
+
usable_memory_mb = usable_memory_gb * 1024
|
343
|
+
|
344
|
+
# Estimate batch size (conservative)
|
345
|
+
estimated_batch_size = max(1, int(usable_memory_mb / (input_size_mb * 4)))
|
346
|
+
|
347
|
+
# Cap at reasonable limits
|
348
|
+
return min(estimated_batch_size, 32)
|
349
|
+
|
350
|
+
|
351
|
+
def cleanup_device_memory(device: DeviceInfo) -> None:
|
352
|
+
"""Clean up device memory.
|
353
|
+
|
354
|
+
Args:
|
355
|
+
device: The device to clean up.
|
356
|
+
"""
|
357
|
+
if device.device_type == "cuda":
|
358
|
+
try:
|
359
|
+
import torch
|
360
|
+
|
361
|
+
if torch.cuda.is_available():
|
362
|
+
torch.cuda.empty_cache()
|
363
|
+
except ImportError:
|
364
|
+
pass
|
365
|
+
|
366
|
+
elif device.device_type == "mps":
|
367
|
+
try:
|
368
|
+
import torch
|
369
|
+
|
370
|
+
if torch.backends.mps.is_available():
|
371
|
+
torch.mps.empty_cache()
|
372
|
+
except (ImportError, AttributeError):
|
373
|
+
pass
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.2.0
|
4
4
|
Summary: A text extraction library supporting PDFs, images, office documents and more
|
5
5
|
Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
|
6
6
|
License: MIT
|
@@ -27,8 +27,8 @@ License-File: LICENSE
|
|
27
27
|
Requires-Dist: anyio>=4.9.0
|
28
28
|
Requires-Dist: charset-normalizer>=3.4.2
|
29
29
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < "3.11"
|
30
|
-
Requires-Dist: html-to-markdown>=1.
|
31
|
-
Requires-Dist: playa-pdf>=0.
|
30
|
+
Requires-Dist: html-to-markdown>=1.4.0
|
31
|
+
Requires-Dist: playa-pdf>=0.6.1
|
32
32
|
Requires-Dist: pypdfium2==4.30.0
|
33
33
|
Requires-Dist: python-calamine>=0.3.2
|
34
34
|
Requires-Dist: python-pptx>=1.0.2
|
@@ -36,7 +36,7 @@ Requires-Dist: typing-extensions>=4.14.0; python_version < "3.12"
|
|
36
36
|
Provides-Extra: all
|
37
37
|
Requires-Dist: easyocr>=1.7.2; extra == "all"
|
38
38
|
Requires-Dist: gmft>=0.4.1; extra == "all"
|
39
|
-
Requires-Dist: paddleocr>=3.0.
|
39
|
+
Requires-Dist: paddleocr>=3.0.2; extra == "all"
|
40
40
|
Requires-Dist: paddlepaddle>=3.0.0; extra == "all"
|
41
41
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == "all"
|
42
42
|
Requires-Dist: setuptools>=80.9.0; extra == "all"
|
@@ -47,7 +47,7 @@ Requires-Dist: easyocr>=1.7.2; extra == "easyocr"
|
|
47
47
|
Provides-Extra: gmft
|
48
48
|
Requires-Dist: gmft>=0.4.1; extra == "gmft"
|
49
49
|
Provides-Extra: paddleocr
|
50
|
-
Requires-Dist: paddleocr>=3.0.
|
50
|
+
Requires-Dist: paddleocr>=3.0.2; extra == "paddleocr"
|
51
51
|
Requires-Dist: paddlepaddle>=3.0.0; extra == "paddleocr"
|
52
52
|
Requires-Dist: setuptools>=80.9.0; extra == "paddleocr"
|
53
53
|
Dynamic: license-file
|
@@ -157,17 +157,9 @@ Kreuzberg supports multiple OCR engines:
|
|
157
157
|
|
158
158
|
For comparison and selection guidance, see the [OCR Backends](https://goldziher.github.io/kreuzberg/user-guide/ocr-backends/) documentation.
|
159
159
|
|
160
|
-
##
|
160
|
+
## Contributing
|
161
161
|
|
162
|
-
|
163
|
-
|
164
|
-
### Local Development
|
165
|
-
|
166
|
-
- Clone the repo
|
167
|
-
- Install the system dependencies
|
168
|
-
- Install the full dependencies with `uv sync`
|
169
|
-
- Install the pre-commit hooks with: `pre-commit install && pre-commit install --hook-type commit-msg`
|
170
|
-
- Make your changes and submit a PR
|
162
|
+
We welcome contributions! Please see our [Contributing Guide](docs/contributing.md) for details on setting up your development environment and submitting pull requests.
|
171
163
|
|
172
164
|
## License
|
173
165
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
anyio>=4.9.0
|
2
2
|
charset-normalizer>=3.4.2
|
3
|
-
html-to-markdown>=1.
|
4
|
-
playa-pdf>=0.
|
3
|
+
html-to-markdown>=1.4.0
|
4
|
+
playa-pdf>=0.6.1
|
5
5
|
pypdfium2==4.30.0
|
6
6
|
python-calamine>=0.3.2
|
7
7
|
python-pptx>=1.0.2
|
@@ -15,7 +15,7 @@ typing-extensions>=4.14.0
|
|
15
15
|
[all]
|
16
16
|
easyocr>=1.7.2
|
17
17
|
gmft>=0.4.1
|
18
|
-
paddleocr>=3.0.
|
18
|
+
paddleocr>=3.0.2
|
19
19
|
paddlepaddle>=3.0.0
|
20
20
|
semantic-text-splitter>=0.27.0
|
21
21
|
setuptools>=80.9.0
|
@@ -30,6 +30,6 @@ easyocr>=1.7.2
|
|
30
30
|
gmft>=0.4.1
|
31
31
|
|
32
32
|
[paddleocr]
|
33
|
-
paddleocr>=3.0.
|
33
|
+
paddleocr>=3.0.2
|
34
34
|
paddlepaddle>=3.0.0
|
35
35
|
setuptools>=80.9.0
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "kreuzberg"
|
3
|
-
version = "3.
|
3
|
+
version = "3.2.0"
|
4
4
|
description = "A text extraction library supporting PDFs, images, office documents and more"
|
5
5
|
readme = "README.md"
|
6
6
|
keywords = [
|
@@ -40,8 +40,8 @@ dependencies = [
|
|
40
40
|
"anyio>=4.9.0",
|
41
41
|
"charset-normalizer>=3.4.2",
|
42
42
|
"exceptiongroup>=1.2.2; python_version<'3.11'",
|
43
|
-
"html-to-markdown>=1.
|
44
|
-
"playa-pdf>=0.
|
43
|
+
"html-to-markdown>=1.4.0",
|
44
|
+
"playa-pdf>=0.6.1", # pinned due to breaking changes in 0.5.0
|
45
45
|
"pypdfium2==4.30.0", # pinned due to bug in 4.30.1, until v5 is stable
|
46
46
|
"python-calamine>=0.3.2",
|
47
47
|
"python-pptx>=1.0.2",
|
@@ -54,7 +54,7 @@ optional-dependencies.all = [
|
|
54
54
|
# gmft
|
55
55
|
"gmft>=0.4.1",
|
56
56
|
# paddle
|
57
|
-
"paddleocr>=3.0.
|
57
|
+
"paddleocr>=3.0.2",
|
58
58
|
"paddlepaddle>=3.0.0",
|
59
59
|
# chunking
|
60
60
|
"semantic-text-splitter>=0.27.0",
|
@@ -70,7 +70,7 @@ optional-dependencies.gmft = [
|
|
70
70
|
"gmft>=0.4.1",
|
71
71
|
]
|
72
72
|
optional-dependencies.paddleocr = [
|
73
|
-
"paddleocr>=3.0.
|
73
|
+
"paddleocr>=3.0.2",
|
74
74
|
"paddlepaddle>=3.0.0",
|
75
75
|
"setuptools>=80.9.0",
|
76
76
|
]
|
@@ -79,13 +79,13 @@ urls.homepage = "https://github.com/Goldziher/kreuzberg"
|
|
79
79
|
[dependency-groups]
|
80
80
|
dev = [
|
81
81
|
"covdefaults>=2.3.0",
|
82
|
-
"mypy>=1.16.
|
82
|
+
"mypy>=1.16.1",
|
83
83
|
"pre-commit>=4.2.0",
|
84
|
-
"pytest>=8.4.
|
85
|
-
"pytest-cov>=6.
|
84
|
+
"pytest>=8.4.1",
|
85
|
+
"pytest-cov>=6.2.1",
|
86
86
|
"pytest-mock>=3.14.0",
|
87
87
|
"pytest-timeout>=2.4.0",
|
88
|
-
"ruff>=0.
|
88
|
+
"ruff>=0.12.0",
|
89
89
|
"trio>=0.30.0",
|
90
90
|
"uv-bump",
|
91
91
|
]
|
@@ -108,19 +108,20 @@ format.docstring-code-line-length = 120
|
|
108
108
|
format.docstring-code-format = true
|
109
109
|
lint.select = [ "ALL" ]
|
110
110
|
lint.ignore = [
|
111
|
-
"ANN401",
|
112
|
-
"COM812",
|
113
|
-
"D100",
|
114
|
-
"D104",
|
115
|
-
"D107",
|
116
|
-
"D205",
|
117
|
-
"E501",
|
118
|
-
"EM",
|
119
|
-
"FBT",
|
120
|
-
"FIX",
|
121
|
-
"ISC001",
|
122
|
-
"
|
123
|
-
"
|
111
|
+
"ANN401", # Dynamically typed ANY for kwargs
|
112
|
+
"COM812", # Conflicts with formatter
|
113
|
+
"D100", # Missing docstring in public module
|
114
|
+
"D104", # Missing docstring in public package
|
115
|
+
"D107", # Missing docstring in __init__
|
116
|
+
"D205", # 1 blank line required between summary line and description
|
117
|
+
"E501", # Line too long, handled by ruff format
|
118
|
+
"EM", # Exception messages,
|
119
|
+
"FBT", # Boolean-typed positional argument in function definition
|
120
|
+
"FIX", # We allow todo and fixme comments
|
121
|
+
"ISC001", # Conflicts with formatter
|
122
|
+
"PLC0415", # Import should be at top-level (we use conditional imports)
|
123
|
+
"TD", # We allow todo and fixme comments
|
124
|
+
"TRY", # Try except block, rules are too strict
|
124
125
|
]
|
125
126
|
lint.per-file-ignores."tests/**/*.*" = [
|
126
127
|
"ARG001",
|
@@ -128,6 +129,7 @@ lint.per-file-ignores."tests/**/*.*" = [
|
|
128
129
|
"N815",
|
129
130
|
"PD",
|
130
131
|
"PGH003",
|
132
|
+
"PLC", # Disable all PLC rules for tests
|
131
133
|
"PLR0915",
|
132
134
|
"PLR2004",
|
133
135
|
"PT006",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|