kreuzberg 3.10.1__py3-none-any.whl → 3.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/_config.py +18 -14
- kreuzberg/_document_classification.py +1 -1
- kreuzberg/_extractors/_base.py +1 -2
- kreuzberg/_extractors/_image.py +18 -17
- kreuzberg/_extractors/_pdf.py +30 -33
- kreuzberg/_mcp/server.py +1 -1
- kreuzberg/_ocr/_easyocr.py +8 -1
- kreuzberg/_ocr/_paddleocr.py +2 -1
- kreuzberg/_types.py +11 -10
- {kreuzberg-3.10.1.dist-info → kreuzberg-3.11.1.dist-info}/METADATA +13 -11
- {kreuzberg-3.10.1.dist-info → kreuzberg-3.11.1.dist-info}/RECORD +14 -14
- {kreuzberg-3.10.1.dist-info → kreuzberg-3.11.1.dist-info}/WHEEL +0 -0
- {kreuzberg-3.10.1.dist-info → kreuzberg-3.11.1.dist-info}/entry_points.txt +0 -0
- {kreuzberg-3.10.1.dist-info → kreuzberg-3.11.1.dist-info}/licenses/LICENSE +0 -0
kreuzberg/_config.py
CHANGED
@@ -97,19 +97,21 @@ def parse_ocr_backend_config(
|
|
97
97
|
if not isinstance(backend_config, dict):
|
98
98
|
return None
|
99
99
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
100
|
+
match backend:
|
101
|
+
case "tesseract":
|
102
|
+
# Convert psm integer to PSMMode enum if needed
|
103
|
+
processed_config = backend_config.copy()
|
104
|
+
if "psm" in processed_config and isinstance(processed_config["psm"], int):
|
105
|
+
from kreuzberg._ocr._tesseract import PSMMode # noqa: PLC0415
|
106
|
+
|
107
|
+
processed_config["psm"] = PSMMode(processed_config["psm"])
|
108
|
+
return TesseractConfig(**processed_config)
|
109
|
+
case "easyocr":
|
110
|
+
return EasyOCRConfig(**backend_config)
|
111
|
+
case "paddleocr":
|
112
|
+
return PaddleOCRConfig(**backend_config)
|
113
|
+
case _:
|
114
|
+
return None
|
113
115
|
|
114
116
|
|
115
117
|
def build_extraction_config_from_dict(config_dict: dict[str, Any]) -> ExtractionConfig:
|
@@ -140,7 +142,9 @@ def build_extraction_config_from_dict(config_dict: dict[str, Any]) -> Extraction
|
|
140
142
|
"document_classification_mode",
|
141
143
|
"keyword_count",
|
142
144
|
}
|
143
|
-
extraction_config
|
145
|
+
extraction_config = extraction_config | {
|
146
|
+
field: config_dict[field] for field in basic_fields if field in config_dict
|
147
|
+
}
|
144
148
|
|
145
149
|
# Handle OCR backend configuration
|
146
150
|
ocr_backend = extraction_config.get("ocr_backend")
|
@@ -62,7 +62,7 @@ def _get_translated_text(result: ExtractionResult) -> str:
|
|
62
62
|
from deep_translator import GoogleTranslator # noqa: PLC0415
|
63
63
|
except ImportError as e: # pragma: no cover
|
64
64
|
raise MissingDependencyError(
|
65
|
-
"The 'deep-translator' library is not installed. Please install it with: pip install 'kreuzberg[
|
65
|
+
"The 'deep-translator' library is not installed. Please install it with: pip install 'kreuzberg[document-classification]'"
|
66
66
|
) from e
|
67
67
|
|
68
68
|
try:
|
kreuzberg/_extractors/_base.py
CHANGED
@@ -116,8 +116,7 @@ class Extractor(ABC):
|
|
116
116
|
quality_score = calculate_quality_score(cleaned_content, dict(result.metadata) if result.metadata else None)
|
117
117
|
|
118
118
|
# Add quality metadata
|
119
|
-
enhanced_metadata = dict(result.metadata) if result.metadata else {}
|
120
|
-
enhanced_metadata["quality_score"] = quality_score
|
119
|
+
enhanced_metadata = (dict(result.metadata) if result.metadata else {}) | {"quality_score": quality_score}
|
121
120
|
|
122
121
|
# Return enhanced result
|
123
122
|
return ExtractionResult(
|
kreuzberg/_extractors/_image.py
CHANGED
@@ -85,23 +85,24 @@ class ImageExtractor(Extractor):
|
|
85
85
|
|
86
86
|
backend = get_ocr_backend(self.config.ocr_backend)
|
87
87
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
88
|
+
match self.config.ocr_backend:
|
89
|
+
case "tesseract":
|
90
|
+
config = (
|
91
|
+
self.config.ocr_config if isinstance(self.config.ocr_config, TesseractConfig) else TesseractConfig()
|
92
|
+
)
|
93
|
+
result = backend.process_file_sync(path, **asdict(config))
|
94
|
+
case "paddleocr":
|
95
|
+
paddle_config = (
|
96
|
+
self.config.ocr_config if isinstance(self.config.ocr_config, PaddleOCRConfig) else PaddleOCRConfig()
|
97
|
+
)
|
98
|
+
result = backend.process_file_sync(path, **asdict(paddle_config))
|
99
|
+
case "easyocr":
|
100
|
+
easy_config = (
|
101
|
+
self.config.ocr_config if isinstance(self.config.ocr_config, EasyOCRConfig) else EasyOCRConfig()
|
102
|
+
)
|
103
|
+
result = backend.process_file_sync(path, **asdict(easy_config))
|
104
|
+
case _:
|
105
|
+
raise NotImplementedError(f"Sync OCR not implemented for {self.config.ocr_backend}")
|
105
106
|
return self._apply_quality_processing(result)
|
106
107
|
|
107
108
|
def _get_extension_from_mime_type(self, mime_type: str) -> str:
|
kreuzberg/_extractors/_pdf.py
CHANGED
@@ -88,14 +88,12 @@ class PDFExtractor(Extractor):
|
|
88
88
|
# Enhance metadata with table information
|
89
89
|
if result.tables:
|
90
90
|
table_summary = generate_table_summary(result.tables)
|
91
|
-
result.metadata.
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
}
|
98
|
-
)
|
91
|
+
result.metadata = result.metadata | {
|
92
|
+
"table_count": table_summary["table_count"],
|
93
|
+
"tables_summary": f"Document contains {table_summary['table_count']} tables "
|
94
|
+
f"across {table_summary['pages_with_tables']} pages with "
|
95
|
+
f"{table_summary['total_rows']} total rows",
|
96
|
+
}
|
99
97
|
|
100
98
|
return self._apply_quality_processing(result)
|
101
99
|
|
@@ -153,14 +151,12 @@ class PDFExtractor(Extractor):
|
|
153
151
|
# Enhance metadata with table information
|
154
152
|
if tables:
|
155
153
|
table_summary = generate_table_summary(tables)
|
156
|
-
result.metadata.
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
}
|
163
|
-
)
|
154
|
+
result.metadata = result.metadata | {
|
155
|
+
"table_count": table_summary["table_count"],
|
156
|
+
"tables_summary": f"Document contains {table_summary['table_count']} tables "
|
157
|
+
f"across {table_summary['pages_with_tables']} pages with "
|
158
|
+
f"{table_summary['total_rows']} total rows",
|
159
|
+
}
|
164
160
|
|
165
161
|
# Apply quality processing
|
166
162
|
return self._apply_quality_processing(result)
|
@@ -386,23 +382,24 @@ class PDFExtractor(Extractor):
|
|
386
382
|
backend = get_ocr_backend(self.config.ocr_backend)
|
387
383
|
paths = [Path(p) for p in image_paths]
|
388
384
|
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
385
|
+
match self.config.ocr_backend:
|
386
|
+
case "tesseract":
|
387
|
+
config = (
|
388
|
+
self.config.ocr_config if isinstance(self.config.ocr_config, TesseractConfig) else TesseractConfig()
|
389
|
+
)
|
390
|
+
results = backend.process_batch_sync(paths, **asdict(config))
|
391
|
+
case "paddleocr":
|
392
|
+
paddle_config = (
|
393
|
+
self.config.ocr_config if isinstance(self.config.ocr_config, PaddleOCRConfig) else PaddleOCRConfig()
|
394
|
+
)
|
395
|
+
results = backend.process_batch_sync(paths, **asdict(paddle_config))
|
396
|
+
case "easyocr":
|
397
|
+
easy_config = (
|
398
|
+
self.config.ocr_config if isinstance(self.config.ocr_config, EasyOCRConfig) else EasyOCRConfig()
|
399
|
+
)
|
400
|
+
results = backend.process_batch_sync(paths, **asdict(easy_config))
|
401
|
+
case _:
|
402
|
+
raise NotImplementedError(f"Sync OCR not implemented for {self.config.ocr_backend}")
|
406
403
|
|
407
404
|
# Use list comprehension and join for efficient string building
|
408
405
|
return "\n\n".join(result.content for result in results)
|
kreuzberg/_mcp/server.py
CHANGED
kreuzberg/_ocr/_easyocr.py
CHANGED
@@ -4,7 +4,6 @@ import warnings
|
|
4
4
|
from dataclasses import dataclass
|
5
5
|
from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
|
6
6
|
|
7
|
-
import numpy as np
|
8
7
|
from PIL import Image
|
9
8
|
|
10
9
|
from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
|
@@ -188,6 +187,9 @@ class EasyOCRBackend(OCRBackend[EasyOCRConfig]):
|
|
188
187
|
|
189
188
|
kwargs.pop("language", None)
|
190
189
|
kwargs.pop("use_gpu", None)
|
190
|
+
kwargs.pop("device", None)
|
191
|
+
kwargs.pop("gpu_memory_limit", None)
|
192
|
+
kwargs.pop("fallback_to_cpu", None)
|
191
193
|
|
192
194
|
try:
|
193
195
|
result = await run_sync(
|
@@ -455,11 +457,16 @@ class EasyOCRBackend(OCRBackend[EasyOCRConfig]):
|
|
455
457
|
Raises:
|
456
458
|
OCRError: If OCR processing fails.
|
457
459
|
"""
|
460
|
+
import numpy as np # noqa: PLC0415
|
461
|
+
|
458
462
|
self._init_easyocr_sync(**kwargs)
|
459
463
|
|
460
464
|
beam_width = kwargs.pop("beam_width")
|
461
465
|
kwargs.pop("language", None)
|
462
466
|
kwargs.pop("use_gpu", None)
|
467
|
+
kwargs.pop("device", None)
|
468
|
+
kwargs.pop("gpu_memory_limit", None)
|
469
|
+
kwargs.pop("fallback_to_cpu", None)
|
463
470
|
|
464
471
|
try:
|
465
472
|
result = self._reader.readtext(
|
kreuzberg/_ocr/_paddleocr.py
CHANGED
@@ -7,7 +7,6 @@ from importlib.util import find_spec
|
|
7
7
|
from pathlib import Path
|
8
8
|
from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
|
9
9
|
|
10
|
-
import numpy as np
|
11
10
|
from PIL import Image
|
12
11
|
|
13
12
|
from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
|
@@ -380,6 +379,8 @@ class PaddleBackend(OCRBackend[PaddleOCRConfig]):
|
|
380
379
|
Raises:
|
381
380
|
OCRError: If OCR processing fails.
|
382
381
|
"""
|
382
|
+
import numpy as np # noqa: PLC0415
|
383
|
+
|
383
384
|
self._init_paddle_ocr_sync(**kwargs)
|
384
385
|
|
385
386
|
if image.mode != "RGB":
|
kreuzberg/_types.py
CHANGED
@@ -349,7 +349,7 @@ class ExtractionConfig:
|
|
349
349
|
"""Configuration for language detection. If None, uses default settings."""
|
350
350
|
spacy_entity_extraction_config: SpacyEntityExtractionConfig | None = None
|
351
351
|
"""Configuration for spaCy entity extraction. If None, uses default settings."""
|
352
|
-
auto_detect_document_type: bool =
|
352
|
+
auto_detect_document_type: bool = False
|
353
353
|
"""Whether to automatically detect the document type."""
|
354
354
|
document_type_confidence_threshold: float = 0.5
|
355
355
|
"""Confidence threshold for document type detection."""
|
@@ -398,15 +398,16 @@ class ExtractionConfig:
|
|
398
398
|
return asdict(self.ocr_config)
|
399
399
|
|
400
400
|
# Lazy load and cache default configs instead of creating new instances
|
401
|
-
|
402
|
-
|
401
|
+
match self.ocr_backend:
|
402
|
+
case "tesseract":
|
403
|
+
from kreuzberg._ocr._tesseract import TesseractConfig # noqa: PLC0415
|
403
404
|
|
404
|
-
|
405
|
-
|
406
|
-
|
405
|
+
return asdict(TesseractConfig())
|
406
|
+
case "easyocr":
|
407
|
+
from kreuzberg._ocr._easyocr import EasyOCRConfig # noqa: PLC0415
|
407
408
|
|
408
|
-
|
409
|
-
|
410
|
-
|
409
|
+
return asdict(EasyOCRConfig())
|
410
|
+
case _: # paddleocr or any other backend
|
411
|
+
from kreuzberg._ocr._paddleocr import PaddleOCRConfig # noqa: PLC0415
|
411
412
|
|
412
|
-
|
413
|
+
return asdict(PaddleOCRConfig())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.11.1
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -28,13 +28,13 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
28
28
|
Classifier: Topic :: Text Processing :: General
|
29
29
|
Classifier: Typing :: Typed
|
30
30
|
Requires-Python: >=3.10
|
31
|
-
Requires-Dist: anyio>=4.
|
31
|
+
Requires-Dist: anyio>=4.10.0
|
32
32
|
Requires-Dist: chardetng-py>=0.3.5
|
33
33
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
34
34
|
Requires-Dist: html-to-markdown[lxml]>=1.9.0
|
35
|
-
Requires-Dist: mcp>=1.12.
|
35
|
+
Requires-Dist: mcp>=1.12.4
|
36
36
|
Requires-Dist: msgspec>=0.18.0
|
37
|
-
Requires-Dist: playa-pdf>=0.
|
37
|
+
Requires-Dist: playa-pdf>=0.7.0
|
38
38
|
Requires-Dist: psutil>=7.0.0
|
39
39
|
Requires-Dist: pypdfium2==4.30.0
|
40
40
|
Requires-Dist: python-calamine>=0.3.2
|
@@ -45,25 +45,24 @@ Requires-Dist: mailparse>=1.0.15; extra == 'additional-extensions'
|
|
45
45
|
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'additional-extensions'
|
46
46
|
Provides-Extra: all
|
47
47
|
Requires-Dist: click>=8.2.1; extra == 'all'
|
48
|
+
Requires-Dist: deep-translator>=1.11.4; extra == 'all'
|
48
49
|
Requires-Dist: easyocr>=1.7.2; extra == 'all'
|
49
50
|
Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
|
50
51
|
Requires-Dist: gmft>=0.4.2; extra == 'all'
|
51
52
|
Requires-Dist: keybert>=0.9.0; extra == 'all'
|
52
|
-
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.
|
53
|
+
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
|
53
54
|
Requires-Dist: mailparse>=1.0.15; extra == 'all'
|
54
55
|
Requires-Dist: paddleocr>=3.1.0; extra == 'all'
|
55
56
|
Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
|
56
|
-
Requires-Dist:
|
57
|
+
Requires-Dist: pandas>=2.3.1; extra == 'all'
|
58
|
+
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
|
57
59
|
Requires-Dist: rich>=14.1.0; extra == 'all'
|
58
60
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
|
59
61
|
Requires-Dist: setuptools>=80.9.0; extra == 'all'
|
60
62
|
Requires-Dist: spacy>=3.8.7; extra == 'all'
|
61
63
|
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
|
62
64
|
Provides-Extra: api
|
63
|
-
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.
|
64
|
-
Provides-Extra: auto-classify-document-type
|
65
|
-
Requires-Dist: deep-translator>=1.11.4; extra == 'auto-classify-document-type'
|
66
|
-
Requires-Dist: pandas>=2.3.1; extra == 'auto-classify-document-type'
|
65
|
+
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'api'
|
67
66
|
Provides-Extra: chunking
|
68
67
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
|
69
68
|
Provides-Extra: cli
|
@@ -71,7 +70,10 @@ Requires-Dist: click>=8.2.1; extra == 'cli'
|
|
71
70
|
Requires-Dist: rich>=14.1.0; extra == 'cli'
|
72
71
|
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
|
73
72
|
Provides-Extra: crypto
|
74
|
-
Requires-Dist: playa-pdf[crypto]>=0.
|
73
|
+
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'crypto'
|
74
|
+
Provides-Extra: document-classification
|
75
|
+
Requires-Dist: deep-translator>=1.11.4; extra == 'document-classification'
|
76
|
+
Requires-Dist: pandas>=2.3.1; extra == 'document-classification'
|
75
77
|
Provides-Extra: easyocr
|
76
78
|
Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
|
77
79
|
Provides-Extra: entity-extraction
|
@@ -1,16 +1,16 @@
|
|
1
1
|
kreuzberg/__init__.py,sha256=0OJ_jNKbS6GxzWC5-EfRCiE80as_ya0-wwyNsTYbxzY,1721
|
2
2
|
kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
|
3
3
|
kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
|
4
|
-
kreuzberg/_config.py,sha256=
|
4
|
+
kreuzberg/_config.py,sha256=Au521UiR7vcQs_8_hhoWIfmDDMJIrDM3XZUB_qHfCmo,14035
|
5
5
|
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
-
kreuzberg/_document_classification.py,sha256=
|
6
|
+
kreuzberg/_document_classification.py,sha256=qFGmwvUMhnNAvNNJO7E-huPx-Ps-_DWxdNxsozIzgaw,6870
|
7
7
|
kreuzberg/_entity_extraction.py,sha256=Oa1T-9mptimpOHtcda-GtrVYH9PFy7DSJj3thJZUD7k,7902
|
8
8
|
kreuzberg/_gmft.py,sha256=HdQ7Xpuixxl2Y0jY8C3KfyQEU0mN4yQdqErWCv4TnFY,25573
|
9
9
|
kreuzberg/_language_detection.py,sha256=_Ng2aHgPxOHFgd507gVNiIGVmnxxbpgYwsO0bD0yTzg,3315
|
10
10
|
kreuzberg/_mime_types.py,sha256=2warRVqfBUNIg8JBg8yP4pRqaMPvwINosHMkJwtH_Fc,8488
|
11
11
|
kreuzberg/_playa.py,sha256=_IPrUSWwSfDQlWXOpKlauV0D9MhGrujGP5kmQ0U3L0g,12188
|
12
12
|
kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
|
13
|
-
kreuzberg/_types.py,sha256=
|
13
|
+
kreuzberg/_types.py,sha256=bMaU6VuoqwOpW6ufshA-DWpNw6t9EokjEDEfFsznvdo,15389
|
14
14
|
kreuzberg/cli.py,sha256=rJMdHg7FhUxefCrx-sf4c2qVGRXr8Xrpjgfx_DQSKMg,12558
|
15
15
|
kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
|
16
16
|
kreuzberg/extraction.py,sha256=Kt1mOxdlOb35yVOdpdhiRPuTgA9BW_TTG9qwCkSxSkc,17332
|
@@ -18,21 +18,21 @@ kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
19
|
kreuzberg/_api/main.py,sha256=8VwxRlIXwnPs7ZYm0saUZsNOjevEAWJQpNreG-X7ZpE,3273
|
20
20
|
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
kreuzberg/_extractors/_base.py,sha256=
|
21
|
+
kreuzberg/_extractors/_base.py,sha256=H_nwynBX3fozncVjV13c329x5eCLl5r7nyVTLQyDAzI,4396
|
22
22
|
kreuzberg/_extractors/_email.py,sha256=Jpr4NFef640uVgNFkR1or-omy8RVt-NOHUYgWRDjyBo,6753
|
23
23
|
kreuzberg/_extractors/_html.py,sha256=lOM1Tgrrvd7vpEeFAxC1dp0Tibr6N2FEHCjgFx0FK64,1745
|
24
|
-
kreuzberg/_extractors/_image.py,sha256=
|
24
|
+
kreuzberg/_extractors/_image.py,sha256=Iz1JpvGqcYyh9g4zO_bMZG3E9S39KNHFu8PrXDRXeOk,4513
|
25
25
|
kreuzberg/_extractors/_pandoc.py,sha256=51k7XISfKaPorhapG7aIeQb94KGsfozxKyT2rwhk9Bk,26553
|
26
|
-
kreuzberg/_extractors/_pdf.py,sha256=
|
26
|
+
kreuzberg/_extractors/_pdf.py,sha256=OflyvwEkuFLmw8E3si35MCGH31fvd5o50VdMmu5QRVs,19884
|
27
27
|
kreuzberg/_extractors/_presentation.py,sha256=CUlqZl_QCdJdumsZh0BpROkFbvi9uq7yMoIt3bRTUeE,10859
|
28
28
|
kreuzberg/_extractors/_spread_sheet.py,sha256=iagiyJsnl-89OP1eqmEv8jWl7gZBJm2x0YOyqBgLasA,13733
|
29
29
|
kreuzberg/_extractors/_structured.py,sha256=PbNaXd-_PUPsE0yZkISod_vLBokbWdVTKEPpEmqaEMM,5787
|
30
30
|
kreuzberg/_mcp/__init__.py,sha256=8PYV-omC8Rln7Cove8C3rHu3d7sR1FuiwSBG1O7vkAE,92
|
31
|
-
kreuzberg/_mcp/server.py,sha256=
|
31
|
+
kreuzberg/_mcp/server.py,sha256=Dxed80MqZsYCFyYo0QdArpKE4H8DhpKY34fijdzV5uw,8731
|
32
32
|
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
33
33
|
kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
|
34
|
-
kreuzberg/_ocr/_easyocr.py,sha256=
|
35
|
-
kreuzberg/_ocr/_paddleocr.py,sha256=
|
34
|
+
kreuzberg/_ocr/_easyocr.py,sha256=eU4MA_B_-cvq_IhpCeYUruL_kqcfm8maNZKP7zvVQHI,17512
|
35
|
+
kreuzberg/_ocr/_paddleocr.py,sha256=I7ns6L56a2Ol460Bge6e0hpc2AkkwDepLcpCsABj5Dc,17609
|
36
36
|
kreuzberg/_ocr/_tesseract.py,sha256=teLMH1pBhpcmEXDcyZlv56hYINLGMuaKZ0CQtcu_czQ,31510
|
37
37
|
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
38
|
kreuzberg/_utils/_cache.py,sha256=hYd_a5Ni5VJBE1XU_eN9gvQ5gg0FRsdbRgmJe-OIJHM,15253
|
@@ -47,8 +47,8 @@ kreuzberg/_utils/_string.py,sha256=bCzO3UO6nXupxvtMWvHqfp1Vd9CTzEH9jmpJXQ7upAU,6
|
|
47
47
|
kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
|
48
48
|
kreuzberg/_utils/_table.py,sha256=IomrfQBP85DZI8RmQjOVs2Siq7VP9FUTYPaZR4t3yRw,8199
|
49
49
|
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
50
|
-
kreuzberg-3.
|
51
|
-
kreuzberg-3.
|
52
|
-
kreuzberg-3.
|
53
|
-
kreuzberg-3.
|
54
|
-
kreuzberg-3.
|
50
|
+
kreuzberg-3.11.1.dist-info/METADATA,sha256=4b51JDwqoS-gjitz5PEpOlDxZ1-lO2C3BR5X2pec4g0,12136
|
51
|
+
kreuzberg-3.11.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
52
|
+
kreuzberg-3.11.1.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
53
|
+
kreuzberg-3.11.1.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
54
|
+
kreuzberg-3.11.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|