kreuzberg 3.11.0__py3-none-any.whl → 3.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/_ocr/_easyocr.py +8 -1
- kreuzberg/_ocr/_paddleocr.py +2 -1
- {kreuzberg-3.11.0.dist-info → kreuzberg-3.11.1.dist-info}/METADATA +8 -8
- {kreuzberg-3.11.0.dist-info → kreuzberg-3.11.1.dist-info}/RECORD +7 -7
- {kreuzberg-3.11.0.dist-info → kreuzberg-3.11.1.dist-info}/WHEEL +0 -0
- {kreuzberg-3.11.0.dist-info → kreuzberg-3.11.1.dist-info}/entry_points.txt +0 -0
- {kreuzberg-3.11.0.dist-info → kreuzberg-3.11.1.dist-info}/licenses/LICENSE +0 -0
kreuzberg/_ocr/_easyocr.py
CHANGED
@@ -4,7 +4,6 @@ import warnings
|
|
4
4
|
from dataclasses import dataclass
|
5
5
|
from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
|
6
6
|
|
7
|
-
import numpy as np
|
8
7
|
from PIL import Image
|
9
8
|
|
10
9
|
from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
|
@@ -188,6 +187,9 @@ class EasyOCRBackend(OCRBackend[EasyOCRConfig]):
|
|
188
187
|
|
189
188
|
kwargs.pop("language", None)
|
190
189
|
kwargs.pop("use_gpu", None)
|
190
|
+
kwargs.pop("device", None)
|
191
|
+
kwargs.pop("gpu_memory_limit", None)
|
192
|
+
kwargs.pop("fallback_to_cpu", None)
|
191
193
|
|
192
194
|
try:
|
193
195
|
result = await run_sync(
|
@@ -455,11 +457,16 @@ class EasyOCRBackend(OCRBackend[EasyOCRConfig]):
|
|
455
457
|
Raises:
|
456
458
|
OCRError: If OCR processing fails.
|
457
459
|
"""
|
460
|
+
import numpy as np # noqa: PLC0415
|
461
|
+
|
458
462
|
self._init_easyocr_sync(**kwargs)
|
459
463
|
|
460
464
|
beam_width = kwargs.pop("beam_width")
|
461
465
|
kwargs.pop("language", None)
|
462
466
|
kwargs.pop("use_gpu", None)
|
467
|
+
kwargs.pop("device", None)
|
468
|
+
kwargs.pop("gpu_memory_limit", None)
|
469
|
+
kwargs.pop("fallback_to_cpu", None)
|
463
470
|
|
464
471
|
try:
|
465
472
|
result = self._reader.readtext(
|
kreuzberg/_ocr/_paddleocr.py
CHANGED
@@ -7,7 +7,6 @@ from importlib.util import find_spec
|
|
7
7
|
from pathlib import Path
|
8
8
|
from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
|
9
9
|
|
10
|
-
import numpy as np
|
11
10
|
from PIL import Image
|
12
11
|
|
13
12
|
from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
|
@@ -380,6 +379,8 @@ class PaddleBackend(OCRBackend[PaddleOCRConfig]):
|
|
380
379
|
Raises:
|
381
380
|
OCRError: If OCR processing fails.
|
382
381
|
"""
|
382
|
+
import numpy as np # noqa: PLC0415
|
383
|
+
|
383
384
|
self._init_paddle_ocr_sync(**kwargs)
|
384
385
|
|
385
386
|
if image.mode != "RGB":
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.11.
|
3
|
+
Version: 3.11.1
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -28,13 +28,13 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
28
28
|
Classifier: Topic :: Text Processing :: General
|
29
29
|
Classifier: Typing :: Typed
|
30
30
|
Requires-Python: >=3.10
|
31
|
-
Requires-Dist: anyio>=4.
|
31
|
+
Requires-Dist: anyio>=4.10.0
|
32
32
|
Requires-Dist: chardetng-py>=0.3.5
|
33
33
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
34
34
|
Requires-Dist: html-to-markdown[lxml]>=1.9.0
|
35
|
-
Requires-Dist: mcp>=1.12.
|
35
|
+
Requires-Dist: mcp>=1.12.4
|
36
36
|
Requires-Dist: msgspec>=0.18.0
|
37
|
-
Requires-Dist: playa-pdf>=0.
|
37
|
+
Requires-Dist: playa-pdf>=0.7.0
|
38
38
|
Requires-Dist: psutil>=7.0.0
|
39
39
|
Requires-Dist: pypdfium2==4.30.0
|
40
40
|
Requires-Dist: python-calamine>=0.3.2
|
@@ -50,19 +50,19 @@ Requires-Dist: easyocr>=1.7.2; extra == 'all'
|
|
50
50
|
Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
|
51
51
|
Requires-Dist: gmft>=0.4.2; extra == 'all'
|
52
52
|
Requires-Dist: keybert>=0.9.0; extra == 'all'
|
53
|
-
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.
|
53
|
+
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
|
54
54
|
Requires-Dist: mailparse>=1.0.15; extra == 'all'
|
55
55
|
Requires-Dist: paddleocr>=3.1.0; extra == 'all'
|
56
56
|
Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
|
57
57
|
Requires-Dist: pandas>=2.3.1; extra == 'all'
|
58
|
-
Requires-Dist: playa-pdf[crypto]>=0.
|
58
|
+
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
|
59
59
|
Requires-Dist: rich>=14.1.0; extra == 'all'
|
60
60
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
|
61
61
|
Requires-Dist: setuptools>=80.9.0; extra == 'all'
|
62
62
|
Requires-Dist: spacy>=3.8.7; extra == 'all'
|
63
63
|
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
|
64
64
|
Provides-Extra: api
|
65
|
-
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.
|
65
|
+
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'api'
|
66
66
|
Provides-Extra: chunking
|
67
67
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
|
68
68
|
Provides-Extra: cli
|
@@ -70,7 +70,7 @@ Requires-Dist: click>=8.2.1; extra == 'cli'
|
|
70
70
|
Requires-Dist: rich>=14.1.0; extra == 'cli'
|
71
71
|
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
|
72
72
|
Provides-Extra: crypto
|
73
|
-
Requires-Dist: playa-pdf[crypto]>=0.
|
73
|
+
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'crypto'
|
74
74
|
Provides-Extra: document-classification
|
75
75
|
Requires-Dist: deep-translator>=1.11.4; extra == 'document-classification'
|
76
76
|
Requires-Dist: pandas>=2.3.1; extra == 'document-classification'
|
@@ -31,8 +31,8 @@ kreuzberg/_mcp/__init__.py,sha256=8PYV-omC8Rln7Cove8C3rHu3d7sR1FuiwSBG1O7vkAE,92
|
|
31
31
|
kreuzberg/_mcp/server.py,sha256=Dxed80MqZsYCFyYo0QdArpKE4H8DhpKY34fijdzV5uw,8731
|
32
32
|
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
33
33
|
kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
|
34
|
-
kreuzberg/_ocr/_easyocr.py,sha256=
|
35
|
-
kreuzberg/_ocr/_paddleocr.py,sha256=
|
34
|
+
kreuzberg/_ocr/_easyocr.py,sha256=eU4MA_B_-cvq_IhpCeYUruL_kqcfm8maNZKP7zvVQHI,17512
|
35
|
+
kreuzberg/_ocr/_paddleocr.py,sha256=I7ns6L56a2Ol460Bge6e0hpc2AkkwDepLcpCsABj5Dc,17609
|
36
36
|
kreuzberg/_ocr/_tesseract.py,sha256=teLMH1pBhpcmEXDcyZlv56hYINLGMuaKZ0CQtcu_czQ,31510
|
37
37
|
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
38
|
kreuzberg/_utils/_cache.py,sha256=hYd_a5Ni5VJBE1XU_eN9gvQ5gg0FRsdbRgmJe-OIJHM,15253
|
@@ -47,8 +47,8 @@ kreuzberg/_utils/_string.py,sha256=bCzO3UO6nXupxvtMWvHqfp1Vd9CTzEH9jmpJXQ7upAU,6
|
|
47
47
|
kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
|
48
48
|
kreuzberg/_utils/_table.py,sha256=IomrfQBP85DZI8RmQjOVs2Siq7VP9FUTYPaZR4t3yRw,8199
|
49
49
|
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
50
|
-
kreuzberg-3.11.
|
51
|
-
kreuzberg-3.11.
|
52
|
-
kreuzberg-3.11.
|
53
|
-
kreuzberg-3.11.
|
54
|
-
kreuzberg-3.11.
|
50
|
+
kreuzberg-3.11.1.dist-info/METADATA,sha256=4b51JDwqoS-gjitz5PEpOlDxZ1-lO2C3BR5X2pec4g0,12136
|
51
|
+
kreuzberg-3.11.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
52
|
+
kreuzberg-3.11.1.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
53
|
+
kreuzberg-3.11.1.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
54
|
+
kreuzberg-3.11.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|