kreuzberg 3.14.0__py3-none-any.whl → 3.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/__init__.py +6 -0
- kreuzberg/_api/_config_cache.py +247 -0
- kreuzberg/_api/main.py +156 -30
- kreuzberg/_chunker.py +7 -6
- kreuzberg/_constants.py +2 -0
- kreuzberg/_document_classification.py +4 -6
- kreuzberg/_entity_extraction.py +9 -4
- kreuzberg/_extractors/_base.py +269 -3
- kreuzberg/_extractors/_email.py +95 -27
- kreuzberg/_extractors/_html.py +85 -7
- kreuzberg/_extractors/_image.py +23 -22
- kreuzberg/_extractors/_pandoc.py +106 -75
- kreuzberg/_extractors/_pdf.py +209 -99
- kreuzberg/_extractors/_presentation.py +72 -8
- kreuzberg/_extractors/_spread_sheet.py +25 -30
- kreuzberg/_mcp/server.py +345 -25
- kreuzberg/_mime_types.py +42 -0
- kreuzberg/_ocr/_easyocr.py +2 -2
- kreuzberg/_ocr/_paddleocr.py +1 -1
- kreuzberg/_ocr/_tesseract.py +74 -34
- kreuzberg/_types.py +182 -23
- kreuzberg/_utils/_cache.py +10 -4
- kreuzberg/_utils/_device.py +2 -4
- kreuzberg/_utils/_image_preprocessing.py +12 -39
- kreuzberg/_utils/_process_pool.py +29 -8
- kreuzberg/_utils/_quality.py +7 -2
- kreuzberg/_utils/_resource_managers.py +65 -0
- kreuzberg/_utils/_sync.py +36 -6
- kreuzberg/_utils/_tmp.py +37 -1
- kreuzberg/cli.py +34 -20
- kreuzberg/extraction.py +43 -27
- {kreuzberg-3.14.0.dist-info → kreuzberg-3.15.0.dist-info}/METADATA +2 -1
- kreuzberg-3.15.0.dist-info/RECORD +60 -0
- kreuzberg-3.14.0.dist-info/RECORD +0 -58
- {kreuzberg-3.14.0.dist-info → kreuzberg-3.15.0.dist-info}/WHEEL +0 -0
- {kreuzberg-3.14.0.dist-info → kreuzberg-3.15.0.dist-info}/entry_points.txt +0 -0
- {kreuzberg-3.14.0.dist-info → kreuzberg-3.15.0.dist-info}/licenses/LICENSE +0 -0
kreuzberg/extraction.py
CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import multiprocessing as mp
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import TYPE_CHECKING,
|
6
|
+
from typing import TYPE_CHECKING, Final, cast
|
7
7
|
|
8
8
|
import anyio
|
9
9
|
|
@@ -30,6 +30,31 @@ if TYPE_CHECKING:
|
|
30
30
|
DEFAULT_CONFIG: Final[ExtractionConfig] = ExtractionConfig()
|
31
31
|
|
32
32
|
|
33
|
+
async def _handle_cache_async(path: Path, config: ExtractionConfig) -> ExtractionResult | None:
|
34
|
+
"""Handle cache lookup and coordination with other processes.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
path: Path to the file being processed
|
38
|
+
config: Extraction configuration
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
Cached result if available, None otherwise
|
42
|
+
"""
|
43
|
+
cache = get_document_cache()
|
44
|
+
|
45
|
+
cached_result = cache.get(path, config)
|
46
|
+
if cached_result is not None:
|
47
|
+
return cached_result
|
48
|
+
|
49
|
+
if cache.is_processing(path, config):
|
50
|
+
event = cache.mark_processing(path, config)
|
51
|
+
await anyio.to_thread.run_sync(event.wait) # pragma: no cover
|
52
|
+
|
53
|
+
return cache.get(path, config) # pragma: no cover
|
54
|
+
|
55
|
+
return None
|
56
|
+
|
57
|
+
|
33
58
|
def _validate_and_post_process_helper(
|
34
59
|
result: ExtractionResult, config: ExtractionConfig, file_path: Path | None = None
|
35
60
|
) -> ExtractionResult:
|
@@ -102,9 +127,9 @@ def _handle_chunk_content(
|
|
102
127
|
mime_type: str,
|
103
128
|
config: ExtractionConfig,
|
104
129
|
content: str,
|
105
|
-
) ->
|
130
|
+
) -> list[str]:
|
106
131
|
chunker = get_chunker(mime_type=mime_type, max_characters=config.max_chars, overlap_characters=config.max_overlap)
|
107
|
-
return chunker.chunks(content)
|
132
|
+
return list(chunker.chunks(content))
|
108
133
|
|
109
134
|
|
110
135
|
async def extract_bytes(content: bytes, mime_type: str, config: ExtractionConfig = DEFAULT_CONFIG) -> ExtractionResult:
|
@@ -153,19 +178,9 @@ async def extract_file(
|
|
153
178
|
path = Path(file_path)
|
154
179
|
|
155
180
|
if config.use_cache:
|
156
|
-
cached_result =
|
181
|
+
cached_result = await _handle_cache_async(path, config)
|
157
182
|
if cached_result is not None:
|
158
183
|
return cached_result
|
159
|
-
|
160
|
-
if cache.is_processing(path, config):
|
161
|
-
event = cache.mark_processing(path, config)
|
162
|
-
await anyio.to_thread.run_sync(event.wait) # pragma: no cover
|
163
|
-
|
164
|
-
# Try cache again after waiting for other process to complete # ~keep
|
165
|
-
cached_result = cache.get(path, config) # pragma: no cover
|
166
|
-
if cached_result is not None: # pragma: no cover
|
167
|
-
return cached_result
|
168
|
-
|
169
184
|
cache.mark_processing(path, config)
|
170
185
|
|
171
186
|
try:
|
@@ -227,11 +242,11 @@ async def batch_extract_file(
|
|
227
242
|
error_result = ExtractionResult(
|
228
243
|
content=f"Error: {type(e).__name__}: {e!s}",
|
229
244
|
mime_type="text/plain",
|
230
|
-
metadata={
|
245
|
+
metadata={
|
231
246
|
"error": f"{type(e).__name__}: {e!s}",
|
232
247
|
"error_context": create_error_context(
|
233
248
|
operation="batch_extract_file",
|
234
|
-
file_path=path,
|
249
|
+
file_path=str(path),
|
235
250
|
error=e,
|
236
251
|
index=index,
|
237
252
|
),
|
@@ -276,7 +291,7 @@ async def batch_extract_bytes(
|
|
276
291
|
error_result = ExtractionResult(
|
277
292
|
content=f"Error: {type(e).__name__}: {e!s}",
|
278
293
|
mime_type="text/plain",
|
279
|
-
metadata={
|
294
|
+
metadata={
|
280
295
|
"error": f"{type(e).__name__}: {e!s}",
|
281
296
|
"error_context": create_error_context(
|
282
297
|
operation="batch_extract_bytes",
|
@@ -400,31 +415,31 @@ def batch_extract_file_sync(
|
|
400
415
|
|
401
416
|
max_workers = min(len(file_paths), mp.cpu_count())
|
402
417
|
|
403
|
-
def extract_single(file_path: PathLike[str] | str) -> tuple[int, ExtractionResult]:
|
418
|
+
def extract_single(index: int, file_path: PathLike[str] | str) -> tuple[int, ExtractionResult]:
|
404
419
|
"""Extract single file with index for ordering."""
|
405
420
|
try:
|
406
421
|
return (
|
407
|
-
|
422
|
+
index,
|
408
423
|
extract_file_sync(file_path=Path(file_path), mime_type=None, config=config),
|
409
424
|
)
|
410
425
|
except Exception as e: # noqa: BLE001
|
411
426
|
error_result = ExtractionResult(
|
412
427
|
content=f"Error: {type(e).__name__}: {e!s}",
|
413
428
|
mime_type="text/plain",
|
414
|
-
metadata={
|
429
|
+
metadata={
|
415
430
|
"error": f"{type(e).__name__}: {e!s}",
|
416
431
|
"error_context": create_error_context(
|
417
432
|
operation="batch_extract_file_sync",
|
418
|
-
file_path=file_path,
|
433
|
+
file_path=str(file_path),
|
419
434
|
error=e,
|
420
435
|
),
|
421
436
|
},
|
422
437
|
chunks=[],
|
423
438
|
)
|
424
|
-
return (
|
439
|
+
return (index, error_result)
|
425
440
|
|
426
441
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
427
|
-
future_to_index = {executor.submit(extract_single, fp): i for i, fp in enumerate(file_paths)}
|
442
|
+
future_to_index = {executor.submit(extract_single, i, fp): i for i, fp in enumerate(file_paths)}
|
428
443
|
|
429
444
|
results: list[ExtractionResult | None] = [None] * len(file_paths)
|
430
445
|
for future in as_completed(future_to_index):
|
@@ -453,16 +468,15 @@ def batch_extract_bytes_sync(
|
|
453
468
|
|
454
469
|
max_workers = min(len(contents), mp.cpu_count())
|
455
470
|
|
456
|
-
def extract_single(
|
471
|
+
def extract_single(index: int, content: bytes, mime_type: str) -> tuple[int, ExtractionResult]:
|
457
472
|
"""Extract single content with index for ordering."""
|
458
|
-
index, (content, mime_type) = index_and_content
|
459
473
|
try:
|
460
474
|
return (index, extract_bytes_sync(content=content, mime_type=mime_type, config=config))
|
461
475
|
except Exception as e: # noqa: BLE001
|
462
476
|
error_result = ExtractionResult(
|
463
477
|
content=f"Error: {type(e).__name__}: {e!s}",
|
464
478
|
mime_type="text/plain",
|
465
|
-
metadata={
|
479
|
+
metadata={
|
466
480
|
"error": f"{type(e).__name__}: {e!s}",
|
467
481
|
"error_context": create_error_context(
|
468
482
|
operation="batch_extract_bytes_sync",
|
@@ -477,7 +491,9 @@ def batch_extract_bytes_sync(
|
|
477
491
|
return (index, error_result)
|
478
492
|
|
479
493
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
480
|
-
future_to_index = {
|
494
|
+
future_to_index = {
|
495
|
+
executor.submit(extract_single, i, content, mime_type): i for i, (content, mime_type) in enumerate(contents)
|
496
|
+
}
|
481
497
|
|
482
498
|
results: list[ExtractionResult | None] = [None] * len(contents)
|
483
499
|
for future in as_completed(future_to_index):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.15.0
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -107,6 +107,7 @@ Description-Content-Type: text/markdown
|
|
107
107
|
### Document Intelligence Capabilities
|
108
108
|
|
109
109
|
- **Text Extraction**: High-fidelity text extraction preserving document structure and formatting
|
110
|
+
- **Image Extraction**: Extract embedded images from PDFs, presentations, HTML, and Office documents with optional OCR
|
110
111
|
- **Metadata Extraction**: Comprehensive metadata including author, creation date, language, and document properties
|
111
112
|
- **Format Support**: 18 document types including PDF, Microsoft Office, images, HTML, and structured data formats
|
112
113
|
- **OCR Integration**: Tesseract OCR with markdown output (default) and table extraction from scanned documents
|
@@ -0,0 +1,60 @@
|
|
1
|
+
kreuzberg/__init__.py,sha256=-IHDHXKE7q43MBr_KklpqvhNPjJRhX3qFpMge8kuViE,1467
|
2
|
+
kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
|
3
|
+
kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
|
4
|
+
kreuzberg/_config.py,sha256=2LI5z9gXniqO4afrMmbZfMdhlT2701O5OlGKkrMo-bM,12385
|
5
|
+
kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
|
6
|
+
kreuzberg/_document_classification.py,sha256=zgBjqiHCqhtz74JLtt_V8kk6HQTkK5egGWdAGk9dOEQ,5672
|
7
|
+
kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
|
8
|
+
kreuzberg/_gmft.py,sha256=a7KDXbZM0PxyFpAIjM0xMRvxzoMo4fTQuGlFNa8uXBU,20502
|
9
|
+
kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
|
10
|
+
kreuzberg/_mime_types.py,sha256=-05mBS5AoF4LUmfB_WyLoce0y4peiOyOf2JucF714WQ,8602
|
11
|
+
kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
|
12
|
+
kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
|
13
|
+
kreuzberg/_types.py,sha256=7hj2KWohuSKQ9cJd_VCuSeciuyuOC5MdSkS1s5QaPOg,44870
|
14
|
+
kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
|
15
|
+
kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
|
16
|
+
kreuzberg/extraction.py,sha256=gDkwuj_omQ8OCx4RALD0NjasxMhZLhIju7odK7wMwDM,17789
|
17
|
+
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
|
20
|
+
kreuzberg/_api/main.py,sha256=_r2R_-4zBkyJBn0bcPWogVEDICxWWt5_FFiQIF-r4N4,15463
|
21
|
+
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
+
kreuzberg/_extractors/_base.py,sha256=39E7R7hV6C2uMJdQKLBVSWK3tN-mtK0LaayU10-8Fqo,11191
|
23
|
+
kreuzberg/_extractors/_email.py,sha256=8tsHycVBQ2KSSqp2TZ9a0O1Yxjwe0YvE2GVxUajCVz4,8478
|
24
|
+
kreuzberg/_extractors/_html.py,sha256=7fzNr7-BJ4IND7PWTlEIiqfeKDUb_ZjWO3KDdU3umgI,5151
|
25
|
+
kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
|
26
|
+
kreuzberg/_extractors/_pandoc.py,sha256=CPEJxKTZdfyb7jPacZkiAsR2NEGL6KyiHzOr88tprJY,24142
|
27
|
+
kreuzberg/_extractors/_pdf.py,sha256=MKfihJcveulfkMQc-s5VUCgvK1aw8EyCbUMRwJo_KoM,23225
|
28
|
+
kreuzberg/_extractors/_presentation.py,sha256=MZd4Ft2g5oIrEZ1h3ZWsQTW_VpHI2yi4g4Tdh5iw_7I,10466
|
29
|
+
kreuzberg/_extractors/_spread_sheet.py,sha256=Q2uXvotwqvWiYkIPrtnVL2Ci9ZA7fmTgN6tDN_huwdE,12801
|
30
|
+
kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
|
31
|
+
kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
|
32
|
+
kreuzberg/_mcp/server.py,sha256=n_bfNPSU_SvXVJ5z05oKVj2sFv2uRYoe3ZZzyVOHQOI,17608
|
33
|
+
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
34
|
+
kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
|
35
|
+
kreuzberg/_ocr/_easyocr.py,sha256=6Naqy9JvL96Mm9gw4s-4nRsubd0Z0t8Zn6VC_HInUfc,14577
|
36
|
+
kreuzberg/_ocr/_paddleocr.py,sha256=XyYc3gtmnvOGfQ0qBQYFphJa1kSv5hZ_LJ0weD2hQ08,15006
|
37
|
+
kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
|
38
|
+
kreuzberg/_ocr/_tesseract.py,sha256=fq4qdrzPss9ZaIneUxmwq9x3sFJe8FEi__DLOa1AXN4,50945
|
39
|
+
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
+
kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
|
41
|
+
kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
|
42
|
+
kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
|
43
|
+
kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
|
44
|
+
kreuzberg/_utils/_image_preprocessing.py,sha256=arl4UDDiD_Z6SKM-jTXENaOaaHZBVFTsueb6DcpFXOo,10934
|
45
|
+
kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
|
46
|
+
kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
|
47
|
+
kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
|
48
|
+
kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
|
49
|
+
kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
|
50
|
+
kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
|
51
|
+
kreuzberg/_utils/_serialization.py,sha256=97iIgdcxdbym-BEvy0J6HAduBCUXyCGwhuEHCT_l7I4,1513
|
52
|
+
kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
|
53
|
+
kreuzberg/_utils/_sync.py,sha256=O4ukJfo8hIr72kaoRvvJjbkBeorIw0SUfkovv0YXa7k,3170
|
54
|
+
kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
|
55
|
+
kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
|
56
|
+
kreuzberg-3.15.0.dist-info/METADATA,sha256=-4oGIVQAYBB8BSPbwA_MA1LK-ZROaCxwX6g-re4ZtCQ,12246
|
57
|
+
kreuzberg-3.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
58
|
+
kreuzberg-3.15.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
59
|
+
kreuzberg-3.15.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
60
|
+
kreuzberg-3.15.0.dist-info/RECORD,,
|
@@ -1,58 +0,0 @@
|
|
1
|
-
kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
|
2
|
-
kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
|
3
|
-
kreuzberg/_chunker.py,sha256=tr9_KUYTSLauFois3MsB-A-0hGcTT8hTQFrqNRTii-I,1373
|
4
|
-
kreuzberg/_config.py,sha256=2LI5z9gXniqO4afrMmbZfMdhlT2701O5OlGKkrMo-bM,12385
|
5
|
-
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
-
kreuzberg/_document_classification.py,sha256=Mz_s2GJGsEl7MQ-67BPoGYCZibTy9Sw0PScUZKBjKOA,5736
|
7
|
-
kreuzberg/_entity_extraction.py,sha256=5YpPnqoJ5aiHd_sy4bN4-Ngiq79RhCV6yaUQE8joGXo,3503
|
8
|
-
kreuzberg/_gmft.py,sha256=a7KDXbZM0PxyFpAIjM0xMRvxzoMo4fTQuGlFNa8uXBU,20502
|
9
|
-
kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
|
10
|
-
kreuzberg/_mime_types.py,sha256=kGBDSMO4XPgzUKC7iaBeChCtRQXZ9_zXq6eJydejX_k,7739
|
11
|
-
kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
|
12
|
-
kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
|
13
|
-
kreuzberg/_types.py,sha256=yw8ZzCgwp8T4byh00gdSlABDtRwro6H1pemQsO5IZMQ,39132
|
14
|
-
kreuzberg/cli.py,sha256=Ob0IfqWcaiM09pFdC6wTpdSeql0SGZDxBxfrEhJAGmo,13501
|
15
|
-
kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
|
16
|
-
kreuzberg/extraction.py,sha256=qT-Ziw5FmMqcPT88VrglikL1RASSJCf5W7xP6L9Vi5s,17673
|
17
|
-
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
kreuzberg/_api/main.py,sha256=bZLaQpW8eoTFGvCGJgFodALy4rDfe9kuY1oj9OKPQpU,10792
|
20
|
-
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
kreuzberg/_extractors/_base.py,sha256=i2FvAhRnamEtBb4a-C7pfcdWIXnkEBw0saMQu7h1_RQ,2069
|
22
|
-
kreuzberg/_extractors/_email.py,sha256=jn_8J4BASKJ7zFHBG0PgxNe3OT4pjmEM2tTKX8y_0AE,5887
|
23
|
-
kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
|
24
|
-
kreuzberg/_extractors/_image.py,sha256=lFPoxAf7_Zbx-1t8W4vU2bhHauiNGOAFbZxr_2gNUsw,3991
|
25
|
-
kreuzberg/_extractors/_pandoc.py,sha256=-Ai4S1cXs7F6yeonb_7Y7_ZoWHn29E2oP1WlPtM-4HM,22505
|
26
|
-
kreuzberg/_extractors/_pdf.py,sha256=naJ_AgtAgtGIjAqiU4_G7lgftKWhUjZDLVILSG2AyVc,18757
|
27
|
-
kreuzberg/_extractors/_presentation.py,sha256=ULGkt7dzeA9sYSEhpAucKZmkdv9EubzeZtOjoLP3Z2E,6994
|
28
|
-
kreuzberg/_extractors/_spread_sheet.py,sha256=eBAx_OwoyRqMzmD4Z07UlOBwcXckymgvj_0o7di6thA,12715
|
29
|
-
kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
|
30
|
-
kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
|
31
|
-
kreuzberg/_mcp/server.py,sha256=YPMJp6xnZ3DC32NEdX5Gqf3vwxsHZxXxUxZ6jghpv6I,5688
|
32
|
-
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
33
|
-
kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
|
34
|
-
kreuzberg/_ocr/_easyocr.py,sha256=XbgpGt5tkE4xHleIGvV1cHlpOQTp43rSXBO1CyIyKTg,14599
|
35
|
-
kreuzberg/_ocr/_paddleocr.py,sha256=hfc6Zi2eSUYTVVF9y9D1P2_pLiLXPfFRoJ6QDJ6oZag,15017
|
36
|
-
kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
|
37
|
-
kreuzberg/_ocr/_tesseract.py,sha256=QEKK_PDZnNiZRgpklOgMXB-cObJy6C-HuxL6Gza5Z3c,49136
|
38
|
-
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
|
-
kreuzberg/_utils/_cache.py,sha256=qeyI6rJOQlKtdHjJeOjUxx31eItak_drrNn8Cf8HbN8,13956
|
40
|
-
kreuzberg/_utils/_device.py,sha256=UxGkSTN3Up-Zn43CSyvf8CozW2xAF05Cm01LWA2FZmg,8263
|
41
|
-
kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
|
42
|
-
kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
|
43
|
-
kreuzberg/_utils/_image_preprocessing.py,sha256=2u0A28M07F9XlYebTG5salOUVEE3YT3m8fiR8Z2ZM8E,12326
|
44
|
-
kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
|
45
|
-
kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
|
46
|
-
kreuzberg/_utils/_process_pool.py,sha256=7p8Co1w-Tvh2MUdxMcPMpvOikumrb0nN2ApQVytV-_c,6726
|
47
|
-
kreuzberg/_utils/_quality.py,sha256=f7NbyZysyJQD8jKCNWhogvluU9A7GdEYhMsDBeMbGAA,5412
|
48
|
-
kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
|
49
|
-
kreuzberg/_utils/_serialization.py,sha256=97iIgdcxdbym-BEvy0J6HAduBCUXyCGwhuEHCT_l7I4,1513
|
50
|
-
kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
|
51
|
-
kreuzberg/_utils/_sync.py,sha256=OWiciXPTGHIxgiGoHI2AglZ1siTNT-nU_JCgHPNzzHk,2196
|
52
|
-
kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
|
53
|
-
kreuzberg/_utils/_tmp.py,sha256=wnOInBkcuQoxI1vBLvNv9NqbRCEu9Y03qfOjqQuAk3s,841
|
54
|
-
kreuzberg-3.14.0.dist-info/METADATA,sha256=68rRivXnf8n_F9lqekOydDOd8sehWpHpbbKzRup7XDc,12127
|
55
|
-
kreuzberg-3.14.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
56
|
-
kreuzberg-3.14.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
57
|
-
kreuzberg-3.14.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
58
|
-
kreuzberg-3.14.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|