kreuzberg 3.11.4__py3-none-any.whl → 3.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. kreuzberg/__init__.py +14 -13
  2. kreuzberg/__main__.py +0 -2
  3. kreuzberg/_api/main.py +119 -9
  4. kreuzberg/_config.py +248 -204
  5. kreuzberg/_document_classification.py +0 -8
  6. kreuzberg/_entity_extraction.py +1 -93
  7. kreuzberg/_extractors/_base.py +0 -5
  8. kreuzberg/_extractors/_email.py +1 -11
  9. kreuzberg/_extractors/_html.py +9 -12
  10. kreuzberg/_extractors/_image.py +1 -23
  11. kreuzberg/_extractors/_pandoc.py +10 -89
  12. kreuzberg/_extractors/_pdf.py +39 -92
  13. kreuzberg/_extractors/_presentation.py +0 -17
  14. kreuzberg/_extractors/_spread_sheet.py +13 -53
  15. kreuzberg/_extractors/_structured.py +1 -4
  16. kreuzberg/_gmft.py +14 -138
  17. kreuzberg/_language_detection.py +1 -22
  18. kreuzberg/_mcp/__init__.py +0 -2
  19. kreuzberg/_mcp/server.py +3 -10
  20. kreuzberg/_mime_types.py +1 -2
  21. kreuzberg/_ocr/_easyocr.py +21 -108
  22. kreuzberg/_ocr/_paddleocr.py +16 -94
  23. kreuzberg/_ocr/_table_extractor.py +260 -0
  24. kreuzberg/_ocr/_tesseract.py +906 -264
  25. kreuzberg/_playa.py +5 -4
  26. kreuzberg/_types.py +638 -40
  27. kreuzberg/_utils/_cache.py +88 -90
  28. kreuzberg/_utils/_device.py +0 -18
  29. kreuzberg/_utils/_document_cache.py +0 -2
  30. kreuzberg/_utils/_errors.py +0 -3
  31. kreuzberg/_utils/_pdf_lock.py +0 -2
  32. kreuzberg/_utils/_process_pool.py +19 -19
  33. kreuzberg/_utils/_quality.py +0 -43
  34. kreuzberg/_utils/_ref.py +48 -0
  35. kreuzberg/_utils/_serialization.py +0 -5
  36. kreuzberg/_utils/_string.py +9 -39
  37. kreuzberg/_utils/_sync.py +0 -1
  38. kreuzberg/_utils/_table.py +50 -57
  39. kreuzberg/cli.py +54 -74
  40. kreuzberg/extraction.py +39 -32
  41. {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/METADATA +17 -14
  42. kreuzberg-3.13.0.dist-info/RECORD +56 -0
  43. kreuzberg-3.11.4.dist-info/RECORD +0 -54
  44. {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/WHEEL +0 -0
  45. {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/entry_points.txt +0 -0
  46. {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/licenses/LICENSE +0 -0
kreuzberg/extraction.py CHANGED
@@ -151,20 +151,22 @@ async def extract_file(
151
151
  """
152
152
  cache = get_document_cache()
153
153
  path = Path(file_path)
154
- cached_result = cache.get(path, config)
155
- if cached_result is not None:
156
- return cached_result
157
154
 
158
- if cache.is_processing(path, config):
159
- event = cache.mark_processing(path, config)
160
- await anyio.to_thread.run_sync(event.wait) # pragma: no cover
161
-
162
- # Try cache again after waiting for other process to complete # ~keep
163
- cached_result = cache.get(path, config) # pragma: no cover
164
- if cached_result is not None: # pragma: no cover
155
+ if config.use_cache:
156
+ cached_result = cache.get(path, config)
157
+ if cached_result is not None:
165
158
  return cached_result
166
159
 
167
- cache.mark_processing(path, config)
160
+ if cache.is_processing(path, config):
161
+ event = cache.mark_processing(path, config)
162
+ await anyio.to_thread.run_sync(event.wait) # pragma: no cover
163
+
164
+ # Try cache again after waiting for other process to complete # ~keep
165
+ cached_result = cache.get(path, config) # pragma: no cover
166
+ if cached_result is not None: # pragma: no cover
167
+ return cached_result
168
+
169
+ cache.mark_processing(path, config)
168
170
 
169
171
  try:
170
172
  if not path.exists():
@@ -183,11 +185,13 @@ async def extract_file(
183
185
 
184
186
  result = await _validate_and_post_process_async(result=result, config=config, file_path=path)
185
187
 
186
- cache.set(path, config, result)
188
+ if config.use_cache:
189
+ cache.set(path, config, result)
187
190
 
188
191
  return result
189
192
  finally:
190
- cache.mark_complete(path, config)
193
+ if config.use_cache:
194
+ cache.mark_complete(path, config)
191
195
 
192
196
 
193
197
  async def batch_extract_file(
@@ -224,7 +228,7 @@ async def batch_extract_file(
224
228
  content=f"Error: {type(e).__name__}: {e!s}",
225
229
  mime_type="text/plain",
226
230
  metadata={ # type: ignore[typeddict-unknown-key]
227
- "error": True,
231
+ "error": f"{type(e).__name__}: {e!s}",
228
232
  "error_context": create_error_context(
229
233
  operation="batch_extract_file",
230
234
  file_path=path,
@@ -273,7 +277,7 @@ async def batch_extract_bytes(
273
277
  content=f"Error: {type(e).__name__}: {e!s}",
274
278
  mime_type="text/plain",
275
279
  metadata={ # type: ignore[typeddict-unknown-key]
276
- "error": True,
280
+ "error": f"{type(e).__name__}: {e!s}",
277
281
  "error_context": create_error_context(
278
282
  operation="batch_extract_bytes",
279
283
  error=e,
@@ -336,20 +340,22 @@ def extract_file_sync(
336
340
  """
337
341
  cache = get_document_cache()
338
342
  path = Path(file_path)
339
- cached_result = cache.get(path, config)
340
- if cached_result is not None:
341
- return cached_result
342
343
 
343
- if cache.is_processing(path, config):
344
- event = cache.mark_processing(path, config)
345
- event.wait() # pragma: no cover
346
-
347
- # Try cache again after waiting for other process to complete # ~keep
348
- cached_result = cache.get(path, config) # pragma: no cover
349
- if cached_result is not None: # pragma: no cover
344
+ if config.use_cache:
345
+ cached_result = cache.get(path, config)
346
+ if cached_result is not None:
350
347
  return cached_result
351
348
 
352
- cache.mark_processing(path, config)
349
+ if cache.is_processing(path, config):
350
+ event = cache.mark_processing(path, config)
351
+ event.wait() # pragma: no cover
352
+
353
+ # Try cache again after waiting for other process to complete # ~keep
354
+ cached_result = cache.get(path, config) # pragma: no cover
355
+ if cached_result is not None: # pragma: no cover
356
+ return cached_result
357
+
358
+ cache.mark_processing(path, config)
353
359
 
354
360
  try:
355
361
  if not path.exists():
@@ -360,7 +366,7 @@ def extract_file_sync(
360
366
  result = extractor.extract_path_sync(Path(file_path))
361
367
  else:
362
368
  result = ExtractionResult(
363
- content=Path(file_path).read_text(),
369
+ content=Path(file_path).read_text(encoding="utf-8"),
364
370
  chunks=[],
365
371
  mime_type=mime_type,
366
372
  metadata={},
@@ -368,11 +374,13 @@ def extract_file_sync(
368
374
 
369
375
  result = _validate_and_post_process_sync(result=result, config=config, file_path=path)
370
376
 
371
- cache.set(path, config, result)
377
+ if config.use_cache:
378
+ cache.set(path, config, result)
372
379
 
373
380
  return result
374
381
  finally:
375
- cache.mark_complete(path, config)
382
+ if config.use_cache:
383
+ cache.mark_complete(path, config)
376
384
 
377
385
 
378
386
  def batch_extract_file_sync(
@@ -404,7 +412,7 @@ def batch_extract_file_sync(
404
412
  content=f"Error: {type(e).__name__}: {e!s}",
405
413
  mime_type="text/plain",
406
414
  metadata={ # type: ignore[typeddict-unknown-key]
407
- "error": True,
415
+ "error": f"{type(e).__name__}: {e!s}",
408
416
  "error_context": create_error_context(
409
417
  operation="batch_extract_file_sync",
410
418
  file_path=file_path,
@@ -455,7 +463,7 @@ def batch_extract_bytes_sync(
455
463
  content=f"Error: {type(e).__name__}: {e!s}",
456
464
  mime_type="text/plain",
457
465
  metadata={ # type: ignore[typeddict-unknown-key]
458
- "error": True,
466
+ "error": f"{type(e).__name__}: {e!s}",
459
467
  "error_context": create_error_context(
460
468
  operation="batch_extract_bytes_sync",
461
469
  error=e,
@@ -469,7 +477,6 @@ def batch_extract_bytes_sync(
469
477
  return (index, error_result)
470
478
 
471
479
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
472
- # Avoid creating intermediate list, use enumerate directly
473
480
  future_to_index = {executor.submit(extract_single, (i, content)): i for i, content in enumerate(contents)}
474
481
 
475
482
  results: list[ExtractionResult] = [None] * len(contents) # type: ignore[list-item]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.11.4
3
+ Version: 3.13.0
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -31,15 +31,16 @@ Requires-Python: >=3.10
31
31
  Requires-Dist: anyio>=4.10.0
32
32
  Requires-Dist: chardetng-py>=0.3.5
33
33
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
- Requires-Dist: html-to-markdown[lxml]>=1.9.0
34
+ Requires-Dist: html-to-markdown[lxml]>=1.9.1
35
35
  Requires-Dist: mcp>=1.13.0
36
36
  Requires-Dist: msgspec>=0.18.0
37
37
  Requires-Dist: playa-pdf>=0.7.0
38
+ Requires-Dist: polars>=1.33.0
38
39
  Requires-Dist: psutil>=7.0.0
39
40
  Requires-Dist: pypdfium2==4.30.0
40
41
  Requires-Dist: python-calamine>=0.3.2
41
42
  Requires-Dist: python-pptx>=1.0.2
42
- Requires-Dist: typing-extensions>=4.14.0; python_version < '3.12'
43
+ Requires-Dist: typing-extensions>=4.15.0; python_version < '3.12'
43
44
  Provides-Extra: additional-extensions
44
45
  Requires-Dist: mailparse>=1.0.15; extra == 'additional-extensions'
45
46
  Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'additional-extensions'
@@ -54,7 +55,6 @@ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all
54
55
  Requires-Dist: mailparse>=1.0.15; extra == 'all'
55
56
  Requires-Dist: paddleocr>=3.2.0; extra == 'all'
56
57
  Requires-Dist: paddlepaddle>=3.1.1; extra == 'all'
57
- Requires-Dist: pandas>=2.3.2; extra == 'all'
58
58
  Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
59
59
  Requires-Dist: rich>=14.1.0; extra == 'all'
60
60
  Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
@@ -73,7 +73,6 @@ Provides-Extra: crypto
73
73
  Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'crypto'
74
74
  Provides-Extra: document-classification
75
75
  Requires-Dist: deep-translator>=1.11.4; extra == 'document-classification'
76
- Requires-Dist: pandas>=2.3.2; extra == 'document-classification'
77
76
  Provides-Extra: easyocr
78
77
  Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
79
78
  Provides-Extra: entity-extraction
@@ -109,8 +108,7 @@ Description-Content-Type: text/markdown
109
108
  - **Text Extraction**: High-fidelity text extraction preserving document structure and formatting
110
109
  - **Metadata Extraction**: Comprehensive metadata including author, creation date, language, and document properties
111
110
  - **Format Support**: 18 document types including PDF, Microsoft Office, images, HTML, and structured data formats
112
- - **OCR Integration**: Multiple OCR engines (Tesseract, EasyOCR, PaddleOCR) with automatic fallback
113
- - **Table Detection**: Structured table extraction with cell-level precision via GMFT integration
111
+ - **OCR Integration**: Tesseract OCR with markdown output (default) and table extraction from scanned documents
114
112
  - **Document Classification**: Automatic document type detection (contracts, forms, invoices, receipts, reports)
115
113
 
116
114
  ### Technical Architecture
@@ -138,8 +136,8 @@ Kreuzberg leverages established open source technologies:
138
136
  # Extract text from any file to text format
139
137
  uvx kreuzberg extract document.pdf > output.txt
140
138
 
141
- # With all features (OCR, table extraction, etc.)
142
- uvx --from "kreuzberg[all]" kreuzberg extract invoice.pdf --ocr-backend tesseract --output-format text
139
+ # With all features (chunking, language detection, etc.)
140
+ uvx kreuzberg extract invoice.pdf --ocr-backend tesseract --output-format text
143
141
 
144
142
  # Extract with rich metadata
145
143
  uvx kreuzberg extract report.pdf --show-metadata --output-format json
@@ -179,10 +177,15 @@ print(f"Keywords: {result.metadata.keywords}")
179
177
 
180
178
  ### Docker
181
179
 
180
+ Two optimized images available:
181
+
182
182
  ```bash
183
- # Run the REST API
183
+ # Base image (API + CLI + multilingual OCR)
184
184
  docker run -p 8000:8000 goldziher/kreuzberg
185
185
 
186
+ # Core image (+ chunking + crypto + document classification + language detection)
187
+ docker run -p 8000:8000 goldziher/kreuzberg-core:latest
188
+
186
189
  # Extract via API
187
190
  curl -X POST -F "file=@document.pdf" http://localhost:8000/extract
188
191
  ```
@@ -196,7 +199,7 @@ curl -X POST -F "file=@document.pdf" http://localhost:8000/extract
196
199
  **Add to Claude Desktop with one command:**
197
200
 
198
201
  ```bash
199
- claude mcp add kreuzberg uvx -- --from "kreuzberg[all]" kreuzberg-mcp
202
+ claude mcp add kreuzberg uvx kreuzberg-mcp
200
203
  ```
201
204
 
202
205
  **Or configure manually in `claude_desktop_config.json`:**
@@ -206,7 +209,7 @@ claude mcp add kreuzberg uvx -- --from "kreuzberg[all]" kreuzberg-mcp
206
209
  "mcpServers": {
207
210
  "kreuzberg": {
208
211
  "command": "uvx",
209
- "args": ["--from", "kreuzberg[all]", "kreuzberg-mcp"]
212
+ "args": ["kreuzberg-mcp"]
210
213
  }
211
214
  }
212
215
  }
@@ -215,8 +218,8 @@ claude mcp add kreuzberg uvx -- --from "kreuzberg[all]" kreuzberg-mcp
215
218
  **MCP capabilities:**
216
219
 
217
220
  - Extract text from PDFs, images, Office docs, and more
218
- - Full OCR support with multiple engines
219
- - Table extraction and metadata parsing
221
+ - Multilingual OCR support with Tesseract
222
+ - Metadata parsing and language detection
220
223
 
221
224
  📖 **[MCP Documentation](https://kreuzberg.dev/user-guide/mcp-server/)**
222
225
 
@@ -0,0 +1,56 @@
1
+ kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
2
+ kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
+ kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
4
+ kreuzberg/_config.py,sha256=dSTumnpleMeUjUabWgAH7WlhTkdNG3eeMv8FSFmUaEI,15776
5
+ kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
6
+ kreuzberg/_document_classification.py,sha256=NZ-6tQtVa1OgigC7xf30hAsnL5_gi9ak9X2XYdsCfTI,6361
7
+ kreuzberg/_entity_extraction.py,sha256=QFIPQ_fovEnEezpS6W4pwpjTA2PqS7TUCD9AKf8sAyc,4666
8
+ kreuzberg/_gmft.py,sha256=60WpPTf7jocU-kmkBe-pBytl7l58aQzd-Aw2_Hlioug,21481
9
+ kreuzberg/_language_detection.py,sha256=yLUliJOUyofVma_q6FwzG9Ck4-XX3AEjxleTHrqi8R4,2445
10
+ kreuzberg/_mime_types.py,sha256=fwtPKtp2XhCLT686qF26PBMeOqcVJroKPwkp7JgaM0E,8462
11
+ kreuzberg/_playa.py,sha256=1viLRqgcDWvaPo5ZsDPO2gqHFSBApOYortTV_SPVK9k,12190
12
+ kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
13
+ kreuzberg/_types.py,sha256=WFUFY1S7SL7kTfHCX-zGASLYT94FxLD71C9vGUzFOiA,38922
14
+ kreuzberg/cli.py,sha256=MLeWoMcLoN6WnkbyRbOY-2dqp-vNZf7Nb-K_R5F5CoU,12730
15
+ kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
+ kreuzberg/extraction.py,sha256=jiMKiDyTf3sHyk76sMffHR-eH-_yg-DFRMuXEKufRYI,17649
17
+ kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ kreuzberg/_api/main.py,sha256=JALYRD0qwyoZloWk5dNNuslBtG4GlVNc0G2oADm6cAc,7578
20
+ kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ kreuzberg/_extractors/_base.py,sha256=EZTEJzwJxwu_yYFQ5QlZVNQMPCcli7yyUB4T5mFotCY,4209
22
+ kreuzberg/_extractors/_email.py,sha256=mVi_VDmiFhe6NgiWxJDYt4DQiP6jVs5dP8BsPClm3WQ,6108
23
+ kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
24
+ kreuzberg/_extractors/_image.py,sha256=UZEOmKNAS4KjaX38iYq2Ux6Mta3juCF1MzWNeBxpPE8,3414
25
+ kreuzberg/_extractors/_pandoc.py,sha256=zumwImIXwD3ziPhYxt0EQct5sSMy5lQiY6KnPSDxBTU,24183
26
+ kreuzberg/_extractors/_pdf.py,sha256=766O7rXAeAJ42vPpWbGpW_WgHXm48eWwX09l3aqjKeM,18064
27
+ kreuzberg/_extractors/_presentation.py,sha256=BJdEM9jsuAd0vb-PIRwNMcRj4xVjItb5kpOpnjsCBi0,10175
28
+ kreuzberg/_extractors/_spread_sheet.py,sha256=wqAV-Stqfd4hXs5ock-chqBEdzv4voSgT1uFUO1cIU0,12075
29
+ kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
30
+ kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
31
+ kreuzberg/_mcp/server.py,sha256=iYJG6g0u7I6mWtC4R1XlxydBrPpgnp5dGJzpm9QAZig,8438
32
+ kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
33
+ kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
34
+ kreuzberg/_ocr/_easyocr.py,sha256=CtiHGx_BmuUwZhC7bScYF9mwnAxRrLWJ-X70fuwFTjk,14079
35
+ kreuzberg/_ocr/_paddleocr.py,sha256=wCuIQ_yxPWE9hukiehYNRdt00Rb2h6pWdfqPS8hI2s0,14297
36
+ kreuzberg/_ocr/_table_extractor.py,sha256=MeQLQn_bRco5OAcUoy613ZbZLCDBRJY8uHH_bUBSP8I,7613
37
+ kreuzberg/_ocr/_tesseract.py,sha256=i_UTjOmrFxZbtmXxrQIsE78wtZLTyZph0i0jDQc4EMA,56916
38
+ kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ kreuzberg/_utils/_cache.py,sha256=fDqFp_54-Kyn3_4VkXkhovvNIB2osKqXlS13MlORrU8,14539
40
+ kreuzberg/_utils/_device.py,sha256=35xQvrLSPISJlWicQGknoBjkwdalwVxiJbzyxwuwOVo,9747
41
+ kreuzberg/_utils/_document_cache.py,sha256=CpCdJVd8SYLjfwm0ozSM8mx5x8i9vVDet3BlEUpzuZY,6920
42
+ kreuzberg/_utils/_errors.py,sha256=ctD-s1q7vbEgqHQ3OVJiEOODDLTd2LvrM3z6o37zrGI,6395
43
+ kreuzberg/_utils/_pdf_lock.py,sha256=mHB1A4Fo_nSfgdqUNEWODH9b5tNFqpEHcNE6rT41dGE,1886
44
+ kreuzberg/_utils/_process_pool.py,sha256=ebuMPmHXPkWaLWjgAkeaONvAZo974PhfENN8pnPTCco,8415
45
+ kreuzberg/_utils/_quality.py,sha256=m3SIXGDY9pfRmh3XeKdZWT1vBz7issH0SfKsutEuRxw,5833
46
+ kreuzberg/_utils/_ref.py,sha256=uP_S3x0AQH2Nyjo1tYEj7N_u9hGzYVewdjch6a8Fv5I,1458
47
+ kreuzberg/_utils/_serialization.py,sha256=duKP5OuBvi-m6ljQOhoyuJU7sl2WPnov8yJDpYuDArw,2052
48
+ kreuzberg/_utils/_string.py,sha256=yrcwHHl23FxWrNoFXkmR3icgivfvbLRvkqQek8F3qqI,5020
49
+ kreuzberg/_utils/_sync.py,sha256=mc-K2y_sc6mG-HOswlHTXAWaEzgisEERvq9PPw2dAw4,4869
50
+ kreuzberg/_utils/_table.py,sha256=dYM_dWNHRCXcWOhSQBnahOJaBXyuQFyYX9arHrH4TF8,7555
51
+ kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
52
+ kreuzberg-3.13.0.dist-info/METADATA,sha256=896BWDLD6ApGiOQFKXMqQezC4qgKRUxjMqbZVWxBoJ0,12098
53
+ kreuzberg-3.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
54
+ kreuzberg-3.13.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
55
+ kreuzberg-3.13.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
56
+ kreuzberg-3.13.0.dist-info/RECORD,,
@@ -1,54 +0,0 @@
1
- kreuzberg/__init__.py,sha256=0OJ_jNKbS6GxzWC5-EfRCiE80as_ya0-wwyNsTYbxzY,1721
2
- kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
3
- kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
4
- kreuzberg/_config.py,sha256=Au521UiR7vcQs_8_hhoWIfmDDMJIrDM3XZUB_qHfCmo,14035
5
- kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
6
- kreuzberg/_document_classification.py,sha256=qFGmwvUMhnNAvNNJO7E-huPx-Ps-_DWxdNxsozIzgaw,6870
7
- kreuzberg/_entity_extraction.py,sha256=Oa1T-9mptimpOHtcda-GtrVYH9PFy7DSJj3thJZUD7k,7902
8
- kreuzberg/_gmft.py,sha256=6P4gSSmU39puaYAKmdGr9ALf0USYTwRDuvvhG1LmI24,26441
9
- kreuzberg/_language_detection.py,sha256=_Ng2aHgPxOHFgd507gVNiIGVmnxxbpgYwsO0bD0yTzg,3315
10
- kreuzberg/_mime_types.py,sha256=2warRVqfBUNIg8JBg8yP4pRqaMPvwINosHMkJwtH_Fc,8488
11
- kreuzberg/_playa.py,sha256=_IPrUSWwSfDQlWXOpKlauV0D9MhGrujGP5kmQ0U3L0g,12188
12
- kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
13
- kreuzberg/_types.py,sha256=bMaU6VuoqwOpW6ufshA-DWpNw6t9EokjEDEfFsznvdo,15389
14
- kreuzberg/cli.py,sha256=nG1CD_h50EWLmDbrb0_DffRl25uTCKeCS6_gRVpjEdU,12578
15
- kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
- kreuzberg/extraction.py,sha256=Kt1mOxdlOb35yVOdpdhiRPuTgA9BW_TTG9qwCkSxSkc,17332
17
- kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- kreuzberg/_api/main.py,sha256=8VwxRlIXwnPs7ZYm0saUZsNOjevEAWJQpNreG-X7ZpE,3273
20
- kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- kreuzberg/_extractors/_base.py,sha256=H_nwynBX3fozncVjV13c329x5eCLl5r7nyVTLQyDAzI,4396
22
- kreuzberg/_extractors/_email.py,sha256=Jpr4NFef640uVgNFkR1or-omy8RVt-NOHUYgWRDjyBo,6753
23
- kreuzberg/_extractors/_html.py,sha256=lOM1Tgrrvd7vpEeFAxC1dp0Tibr6N2FEHCjgFx0FK64,1745
24
- kreuzberg/_extractors/_image.py,sha256=Iz1JpvGqcYyh9g4zO_bMZG3E9S39KNHFu8PrXDRXeOk,4513
25
- kreuzberg/_extractors/_pandoc.py,sha256=51k7XISfKaPorhapG7aIeQb94KGsfozxKyT2rwhk9Bk,26553
26
- kreuzberg/_extractors/_pdf.py,sha256=OflyvwEkuFLmw8E3si35MCGH31fvd5o50VdMmu5QRVs,19884
27
- kreuzberg/_extractors/_presentation.py,sha256=CUlqZl_QCdJdumsZh0BpROkFbvi9uq7yMoIt3bRTUeE,10859
28
- kreuzberg/_extractors/_spread_sheet.py,sha256=iagiyJsnl-89OP1eqmEv8jWl7gZBJm2x0YOyqBgLasA,13733
29
- kreuzberg/_extractors/_structured.py,sha256=PbNaXd-_PUPsE0yZkISod_vLBokbWdVTKEPpEmqaEMM,5787
30
- kreuzberg/_mcp/__init__.py,sha256=8PYV-omC8Rln7Cove8C3rHu3d7sR1FuiwSBG1O7vkAE,92
31
- kreuzberg/_mcp/server.py,sha256=Dxed80MqZsYCFyYo0QdArpKE4H8DhpKY34fijdzV5uw,8731
32
- kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
33
- kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
34
- kreuzberg/_ocr/_easyocr.py,sha256=eU4MA_B_-cvq_IhpCeYUruL_kqcfm8maNZKP7zvVQHI,17512
35
- kreuzberg/_ocr/_paddleocr.py,sha256=I7ns6L56a2Ol460Bge6e0hpc2AkkwDepLcpCsABj5Dc,17609
36
- kreuzberg/_ocr/_tesseract.py,sha256=teLMH1pBhpcmEXDcyZlv56hYINLGMuaKZ0CQtcu_czQ,31510
37
- kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- kreuzberg/_utils/_cache.py,sha256=hYd_a5Ni5VJBE1XU_eN9gvQ5gg0FRsdbRgmJe-OIJHM,15253
39
- kreuzberg/_utils/_device.py,sha256=JI9p9TGSfQHEi2SL-ovOXMr9RUnVq-RrEly89OvmQ5w,10485
40
- kreuzberg/_utils/_document_cache.py,sha256=ka90JIT-FXUMOv8z2u3fztQgZZb2XQDHTMnBi32mySA,7005
41
- kreuzberg/_utils/_errors.py,sha256=UsktQ_p7eOj9crPsFDg8HgRSE5-IpuFC7y1e6dDI_fY,6503
42
- kreuzberg/_utils/_pdf_lock.py,sha256=nqxAYCNlfWDrJtP4ZNu57st1YnkDl-gYXdr0q8nv0kA,1961
43
- kreuzberg/_utils/_process_pool.py,sha256=4BqhmRspwMyPT2EBfTu_rrn7v722wlMLD8qlYvYsc00,8621
44
- kreuzberg/_utils/_quality.py,sha256=-nKzj5n7yJDYrvl556oq2T5S5oKMEOrjpcRMlZ00Jqo,7668
45
- kreuzberg/_utils/_serialization.py,sha256=cqqxqN2cmtndBhIr4v2wqiMwnNadnKhvuN7EUj3i18M,2290
46
- kreuzberg/_utils/_string.py,sha256=bCzO3UO6nXupxvtMWvHqfp1Vd9CTzEH9jmpJXQ7upAU,6800
47
- kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
48
- kreuzberg/_utils/_table.py,sha256=IomrfQBP85DZI8RmQjOVs2Siq7VP9FUTYPaZR4t3yRw,8199
49
- kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
50
- kreuzberg-3.11.4.dist-info/METADATA,sha256=l3d8PyVfX_aEgXl5ykkuRHJi-8Qzhu4_KcHDYOK2RYg,12136
51
- kreuzberg-3.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
52
- kreuzberg-3.11.4.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
53
- kreuzberg-3.11.4.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
54
- kreuzberg-3.11.4.dist-info/RECORD,,