kreuzberg 3.14.0__py3-none-any.whl → 3.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. kreuzberg/__init__.py +6 -0
  2. kreuzberg/_api/_config_cache.py +247 -0
  3. kreuzberg/_api/main.py +156 -30
  4. kreuzberg/_chunker.py +7 -6
  5. kreuzberg/_constants.py +2 -0
  6. kreuzberg/_document_classification.py +4 -6
  7. kreuzberg/_entity_extraction.py +9 -4
  8. kreuzberg/_extractors/_base.py +269 -3
  9. kreuzberg/_extractors/_email.py +95 -27
  10. kreuzberg/_extractors/_html.py +85 -7
  11. kreuzberg/_extractors/_image.py +23 -22
  12. kreuzberg/_extractors/_pandoc.py +106 -75
  13. kreuzberg/_extractors/_pdf.py +209 -99
  14. kreuzberg/_extractors/_presentation.py +72 -8
  15. kreuzberg/_extractors/_spread_sheet.py +25 -30
  16. kreuzberg/_mcp/server.py +345 -25
  17. kreuzberg/_mime_types.py +42 -0
  18. kreuzberg/_ocr/_easyocr.py +2 -2
  19. kreuzberg/_ocr/_paddleocr.py +1 -1
  20. kreuzberg/_ocr/_tesseract.py +74 -34
  21. kreuzberg/_types.py +182 -23
  22. kreuzberg/_utils/_cache.py +10 -4
  23. kreuzberg/_utils/_device.py +2 -4
  24. kreuzberg/_utils/_image_preprocessing.py +12 -39
  25. kreuzberg/_utils/_process_pool.py +29 -8
  26. kreuzberg/_utils/_quality.py +7 -2
  27. kreuzberg/_utils/_resource_managers.py +65 -0
  28. kreuzberg/_utils/_sync.py +36 -6
  29. kreuzberg/_utils/_tmp.py +37 -1
  30. kreuzberg/cli.py +34 -20
  31. kreuzberg/extraction.py +43 -27
  32. {kreuzberg-3.14.0.dist-info → kreuzberg-3.15.0.dist-info}/METADATA +2 -1
  33. kreuzberg-3.15.0.dist-info/RECORD +60 -0
  34. kreuzberg-3.14.0.dist-info/RECORD +0 -58
  35. {kreuzberg-3.14.0.dist-info → kreuzberg-3.15.0.dist-info}/WHEEL +0 -0
  36. {kreuzberg-3.14.0.dist-info → kreuzberg-3.15.0.dist-info}/entry_points.txt +0 -0
  37. {kreuzberg-3.14.0.dist-info → kreuzberg-3.15.0.dist-info}/licenses/LICENSE +0 -0
kreuzberg/extraction.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import multiprocessing as mp
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Final, cast
6
+ from typing import TYPE_CHECKING, Final, cast
7
7
 
8
8
  import anyio
9
9
 
@@ -30,6 +30,31 @@ if TYPE_CHECKING:
30
30
  DEFAULT_CONFIG: Final[ExtractionConfig] = ExtractionConfig()
31
31
 
32
32
 
33
+ async def _handle_cache_async(path: Path, config: ExtractionConfig) -> ExtractionResult | None:
34
+ """Handle cache lookup and coordination with other processes.
35
+
36
+ Args:
37
+ path: Path to the file being processed
38
+ config: Extraction configuration
39
+
40
+ Returns:
41
+ Cached result if available, None otherwise
42
+ """
43
+ cache = get_document_cache()
44
+
45
+ cached_result = cache.get(path, config)
46
+ if cached_result is not None:
47
+ return cached_result
48
+
49
+ if cache.is_processing(path, config):
50
+ event = cache.mark_processing(path, config)
51
+ await anyio.to_thread.run_sync(event.wait) # pragma: no cover
52
+
53
+ return cache.get(path, config) # pragma: no cover
54
+
55
+ return None
56
+
57
+
33
58
  def _validate_and_post_process_helper(
34
59
  result: ExtractionResult, config: ExtractionConfig, file_path: Path | None = None
35
60
  ) -> ExtractionResult:
@@ -102,9 +127,9 @@ def _handle_chunk_content(
102
127
  mime_type: str,
103
128
  config: ExtractionConfig,
104
129
  content: str,
105
- ) -> Any:
130
+ ) -> list[str]:
106
131
  chunker = get_chunker(mime_type=mime_type, max_characters=config.max_chars, overlap_characters=config.max_overlap)
107
- return chunker.chunks(content)
132
+ return list(chunker.chunks(content))
108
133
 
109
134
 
110
135
  async def extract_bytes(content: bytes, mime_type: str, config: ExtractionConfig = DEFAULT_CONFIG) -> ExtractionResult:
@@ -153,19 +178,9 @@ async def extract_file(
153
178
  path = Path(file_path)
154
179
 
155
180
  if config.use_cache:
156
- cached_result = cache.get(path, config)
181
+ cached_result = await _handle_cache_async(path, config)
157
182
  if cached_result is not None:
158
183
  return cached_result
159
-
160
- if cache.is_processing(path, config):
161
- event = cache.mark_processing(path, config)
162
- await anyio.to_thread.run_sync(event.wait) # pragma: no cover
163
-
164
- # Try cache again after waiting for other process to complete # ~keep
165
- cached_result = cache.get(path, config) # pragma: no cover
166
- if cached_result is not None: # pragma: no cover
167
- return cached_result
168
-
169
184
  cache.mark_processing(path, config)
170
185
 
171
186
  try:
@@ -227,11 +242,11 @@ async def batch_extract_file(
227
242
  error_result = ExtractionResult(
228
243
  content=f"Error: {type(e).__name__}: {e!s}",
229
244
  mime_type="text/plain",
230
- metadata={ # type: ignore[typeddict-unknown-key]
245
+ metadata={
231
246
  "error": f"{type(e).__name__}: {e!s}",
232
247
  "error_context": create_error_context(
233
248
  operation="batch_extract_file",
234
- file_path=path,
249
+ file_path=str(path),
235
250
  error=e,
236
251
  index=index,
237
252
  ),
@@ -276,7 +291,7 @@ async def batch_extract_bytes(
276
291
  error_result = ExtractionResult(
277
292
  content=f"Error: {type(e).__name__}: {e!s}",
278
293
  mime_type="text/plain",
279
- metadata={ # type: ignore[typeddict-unknown-key]
294
+ metadata={
280
295
  "error": f"{type(e).__name__}: {e!s}",
281
296
  "error_context": create_error_context(
282
297
  operation="batch_extract_bytes",
@@ -400,31 +415,31 @@ def batch_extract_file_sync(
400
415
 
401
416
  max_workers = min(len(file_paths), mp.cpu_count())
402
417
 
403
- def extract_single(file_path: PathLike[str] | str) -> tuple[int, ExtractionResult]:
418
+ def extract_single(index: int, file_path: PathLike[str] | str) -> tuple[int, ExtractionResult]:
404
419
  """Extract single file with index for ordering."""
405
420
  try:
406
421
  return (
407
- file_paths.index(file_path),
422
+ index,
408
423
  extract_file_sync(file_path=Path(file_path), mime_type=None, config=config),
409
424
  )
410
425
  except Exception as e: # noqa: BLE001
411
426
  error_result = ExtractionResult(
412
427
  content=f"Error: {type(e).__name__}: {e!s}",
413
428
  mime_type="text/plain",
414
- metadata={ # type: ignore[typeddict-unknown-key]
429
+ metadata={
415
430
  "error": f"{type(e).__name__}: {e!s}",
416
431
  "error_context": create_error_context(
417
432
  operation="batch_extract_file_sync",
418
- file_path=file_path,
433
+ file_path=str(file_path),
419
434
  error=e,
420
435
  ),
421
436
  },
422
437
  chunks=[],
423
438
  )
424
- return (file_paths.index(file_path), error_result)
439
+ return (index, error_result)
425
440
 
426
441
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
427
- future_to_index = {executor.submit(extract_single, fp): i for i, fp in enumerate(file_paths)}
442
+ future_to_index = {executor.submit(extract_single, i, fp): i for i, fp in enumerate(file_paths)}
428
443
 
429
444
  results: list[ExtractionResult | None] = [None] * len(file_paths)
430
445
  for future in as_completed(future_to_index):
@@ -453,16 +468,15 @@ def batch_extract_bytes_sync(
453
468
 
454
469
  max_workers = min(len(contents), mp.cpu_count())
455
470
 
456
- def extract_single(index_and_content: tuple[int, tuple[bytes, str]]) -> tuple[int, ExtractionResult]:
471
+ def extract_single(index: int, content: bytes, mime_type: str) -> tuple[int, ExtractionResult]:
457
472
  """Extract single content with index for ordering."""
458
- index, (content, mime_type) = index_and_content
459
473
  try:
460
474
  return (index, extract_bytes_sync(content=content, mime_type=mime_type, config=config))
461
475
  except Exception as e: # noqa: BLE001
462
476
  error_result = ExtractionResult(
463
477
  content=f"Error: {type(e).__name__}: {e!s}",
464
478
  mime_type="text/plain",
465
- metadata={ # type: ignore[typeddict-unknown-key]
479
+ metadata={
466
480
  "error": f"{type(e).__name__}: {e!s}",
467
481
  "error_context": create_error_context(
468
482
  operation="batch_extract_bytes_sync",
@@ -477,7 +491,9 @@ def batch_extract_bytes_sync(
477
491
  return (index, error_result)
478
492
 
479
493
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
480
- future_to_index = {executor.submit(extract_single, (i, content)): i for i, content in enumerate(contents)}
494
+ future_to_index = {
495
+ executor.submit(extract_single, i, content, mime_type): i for i, (content, mime_type) in enumerate(contents)
496
+ }
481
497
 
482
498
  results: list[ExtractionResult | None] = [None] * len(contents)
483
499
  for future in as_completed(future_to_index):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.14.0
3
+ Version: 3.15.0
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -107,6 +107,7 @@ Description-Content-Type: text/markdown
107
107
  ### Document Intelligence Capabilities
108
108
 
109
109
  - **Text Extraction**: High-fidelity text extraction preserving document structure and formatting
110
+ - **Image Extraction**: Extract embedded images from PDFs, presentations, HTML, and Office documents with optional OCR
110
111
  - **Metadata Extraction**: Comprehensive metadata including author, creation date, language, and document properties
111
112
  - **Format Support**: 18 document types including PDF, Microsoft Office, images, HTML, and structured data formats
112
113
  - **OCR Integration**: Tesseract OCR with markdown output (default) and table extraction from scanned documents
@@ -0,0 +1,60 @@
1
+ kreuzberg/__init__.py,sha256=-IHDHXKE7q43MBr_KklpqvhNPjJRhX3qFpMge8kuViE,1467
2
+ kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
+ kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
4
+ kreuzberg/_config.py,sha256=2LI5z9gXniqO4afrMmbZfMdhlT2701O5OlGKkrMo-bM,12385
5
+ kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
6
+ kreuzberg/_document_classification.py,sha256=zgBjqiHCqhtz74JLtt_V8kk6HQTkK5egGWdAGk9dOEQ,5672
7
+ kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
8
+ kreuzberg/_gmft.py,sha256=a7KDXbZM0PxyFpAIjM0xMRvxzoMo4fTQuGlFNa8uXBU,20502
9
+ kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
10
+ kreuzberg/_mime_types.py,sha256=-05mBS5AoF4LUmfB_WyLoce0y4peiOyOf2JucF714WQ,8602
11
+ kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
+ kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
+ kreuzberg/_types.py,sha256=7hj2KWohuSKQ9cJd_VCuSeciuyuOC5MdSkS1s5QaPOg,44870
14
+ kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
15
+ kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
+ kreuzberg/extraction.py,sha256=gDkwuj_omQ8OCx4RALD0NjasxMhZLhIju7odK7wMwDM,17789
17
+ kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
20
+ kreuzberg/_api/main.py,sha256=_r2R_-4zBkyJBn0bcPWogVEDICxWWt5_FFiQIF-r4N4,15463
21
+ kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ kreuzberg/_extractors/_base.py,sha256=39E7R7hV6C2uMJdQKLBVSWK3tN-mtK0LaayU10-8Fqo,11191
23
+ kreuzberg/_extractors/_email.py,sha256=8tsHycVBQ2KSSqp2TZ9a0O1Yxjwe0YvE2GVxUajCVz4,8478
24
+ kreuzberg/_extractors/_html.py,sha256=7fzNr7-BJ4IND7PWTlEIiqfeKDUb_ZjWO3KDdU3umgI,5151
25
+ kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
26
+ kreuzberg/_extractors/_pandoc.py,sha256=CPEJxKTZdfyb7jPacZkiAsR2NEGL6KyiHzOr88tprJY,24142
27
+ kreuzberg/_extractors/_pdf.py,sha256=MKfihJcveulfkMQc-s5VUCgvK1aw8EyCbUMRwJo_KoM,23225
28
+ kreuzberg/_extractors/_presentation.py,sha256=MZd4Ft2g5oIrEZ1h3ZWsQTW_VpHI2yi4g4Tdh5iw_7I,10466
29
+ kreuzberg/_extractors/_spread_sheet.py,sha256=Q2uXvotwqvWiYkIPrtnVL2Ci9ZA7fmTgN6tDN_huwdE,12801
30
+ kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
31
+ kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
32
+ kreuzberg/_mcp/server.py,sha256=n_bfNPSU_SvXVJ5z05oKVj2sFv2uRYoe3ZZzyVOHQOI,17608
33
+ kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
34
+ kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
35
+ kreuzberg/_ocr/_easyocr.py,sha256=6Naqy9JvL96Mm9gw4s-4nRsubd0Z0t8Zn6VC_HInUfc,14577
36
+ kreuzberg/_ocr/_paddleocr.py,sha256=XyYc3gtmnvOGfQ0qBQYFphJa1kSv5hZ_LJ0weD2hQ08,15006
37
+ kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
38
+ kreuzberg/_ocr/_tesseract.py,sha256=fq4qdrzPss9ZaIneUxmwq9x3sFJe8FEi__DLOa1AXN4,50945
39
+ kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
+ kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
41
+ kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
42
+ kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
43
+ kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
44
+ kreuzberg/_utils/_image_preprocessing.py,sha256=arl4UDDiD_Z6SKM-jTXENaOaaHZBVFTsueb6DcpFXOo,10934
45
+ kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
46
+ kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
47
+ kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
48
+ kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
49
+ kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
50
+ kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
51
+ kreuzberg/_utils/_serialization.py,sha256=97iIgdcxdbym-BEvy0J6HAduBCUXyCGwhuEHCT_l7I4,1513
52
+ kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
53
+ kreuzberg/_utils/_sync.py,sha256=O4ukJfo8hIr72kaoRvvJjbkBeorIw0SUfkovv0YXa7k,3170
54
+ kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
55
+ kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
56
+ kreuzberg-3.15.0.dist-info/METADATA,sha256=-4oGIVQAYBB8BSPbwA_MA1LK-ZROaCxwX6g-re4ZtCQ,12246
57
+ kreuzberg-3.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
+ kreuzberg-3.15.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
59
+ kreuzberg-3.15.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
60
+ kreuzberg-3.15.0.dist-info/RECORD,,
@@ -1,58 +0,0 @@
1
- kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
2
- kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
- kreuzberg/_chunker.py,sha256=tr9_KUYTSLauFois3MsB-A-0hGcTT8hTQFrqNRTii-I,1373
4
- kreuzberg/_config.py,sha256=2LI5z9gXniqO4afrMmbZfMdhlT2701O5OlGKkrMo-bM,12385
5
- kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
6
- kreuzberg/_document_classification.py,sha256=Mz_s2GJGsEl7MQ-67BPoGYCZibTy9Sw0PScUZKBjKOA,5736
7
- kreuzberg/_entity_extraction.py,sha256=5YpPnqoJ5aiHd_sy4bN4-Ngiq79RhCV6yaUQE8joGXo,3503
8
- kreuzberg/_gmft.py,sha256=a7KDXbZM0PxyFpAIjM0xMRvxzoMo4fTQuGlFNa8uXBU,20502
9
- kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
10
- kreuzberg/_mime_types.py,sha256=kGBDSMO4XPgzUKC7iaBeChCtRQXZ9_zXq6eJydejX_k,7739
11
- kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
- kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
- kreuzberg/_types.py,sha256=yw8ZzCgwp8T4byh00gdSlABDtRwro6H1pemQsO5IZMQ,39132
14
- kreuzberg/cli.py,sha256=Ob0IfqWcaiM09pFdC6wTpdSeql0SGZDxBxfrEhJAGmo,13501
15
- kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
- kreuzberg/extraction.py,sha256=qT-Ziw5FmMqcPT88VrglikL1RASSJCf5W7xP6L9Vi5s,17673
17
- kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- kreuzberg/_api/main.py,sha256=bZLaQpW8eoTFGvCGJgFodALy4rDfe9kuY1oj9OKPQpU,10792
20
- kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- kreuzberg/_extractors/_base.py,sha256=i2FvAhRnamEtBb4a-C7pfcdWIXnkEBw0saMQu7h1_RQ,2069
22
- kreuzberg/_extractors/_email.py,sha256=jn_8J4BASKJ7zFHBG0PgxNe3OT4pjmEM2tTKX8y_0AE,5887
23
- kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
24
- kreuzberg/_extractors/_image.py,sha256=lFPoxAf7_Zbx-1t8W4vU2bhHauiNGOAFbZxr_2gNUsw,3991
25
- kreuzberg/_extractors/_pandoc.py,sha256=-Ai4S1cXs7F6yeonb_7Y7_ZoWHn29E2oP1WlPtM-4HM,22505
26
- kreuzberg/_extractors/_pdf.py,sha256=naJ_AgtAgtGIjAqiU4_G7lgftKWhUjZDLVILSG2AyVc,18757
27
- kreuzberg/_extractors/_presentation.py,sha256=ULGkt7dzeA9sYSEhpAucKZmkdv9EubzeZtOjoLP3Z2E,6994
28
- kreuzberg/_extractors/_spread_sheet.py,sha256=eBAx_OwoyRqMzmD4Z07UlOBwcXckymgvj_0o7di6thA,12715
29
- kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
30
- kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
31
- kreuzberg/_mcp/server.py,sha256=YPMJp6xnZ3DC32NEdX5Gqf3vwxsHZxXxUxZ6jghpv6I,5688
32
- kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
33
- kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
34
- kreuzberg/_ocr/_easyocr.py,sha256=XbgpGt5tkE4xHleIGvV1cHlpOQTp43rSXBO1CyIyKTg,14599
35
- kreuzberg/_ocr/_paddleocr.py,sha256=hfc6Zi2eSUYTVVF9y9D1P2_pLiLXPfFRoJ6QDJ6oZag,15017
36
- kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
37
- kreuzberg/_ocr/_tesseract.py,sha256=QEKK_PDZnNiZRgpklOgMXB-cObJy6C-HuxL6Gza5Z3c,49136
38
- kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- kreuzberg/_utils/_cache.py,sha256=qeyI6rJOQlKtdHjJeOjUxx31eItak_drrNn8Cf8HbN8,13956
40
- kreuzberg/_utils/_device.py,sha256=UxGkSTN3Up-Zn43CSyvf8CozW2xAF05Cm01LWA2FZmg,8263
41
- kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
42
- kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
43
- kreuzberg/_utils/_image_preprocessing.py,sha256=2u0A28M07F9XlYebTG5salOUVEE3YT3m8fiR8Z2ZM8E,12326
44
- kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
45
- kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
46
- kreuzberg/_utils/_process_pool.py,sha256=7p8Co1w-Tvh2MUdxMcPMpvOikumrb0nN2ApQVytV-_c,6726
47
- kreuzberg/_utils/_quality.py,sha256=f7NbyZysyJQD8jKCNWhogvluU9A7GdEYhMsDBeMbGAA,5412
48
- kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
49
- kreuzberg/_utils/_serialization.py,sha256=97iIgdcxdbym-BEvy0J6HAduBCUXyCGwhuEHCT_l7I4,1513
50
- kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
51
- kreuzberg/_utils/_sync.py,sha256=OWiciXPTGHIxgiGoHI2AglZ1siTNT-nU_JCgHPNzzHk,2196
52
- kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
53
- kreuzberg/_utils/_tmp.py,sha256=wnOInBkcuQoxI1vBLvNv9NqbRCEu9Y03qfOjqQuAk3s,841
54
- kreuzberg-3.14.0.dist-info/METADATA,sha256=68rRivXnf8n_F9lqekOydDOd8sehWpHpbbKzRup7XDc,12127
55
- kreuzberg-3.14.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
- kreuzberg-3.14.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
57
- kreuzberg-3.14.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
58
- kreuzberg-3.14.0.dist-info/RECORD,,