PyPI - kreuzberg - Versions diffs - 3.11.4__py3-none-any.whl → 3.13.0__py3-none-any.whl - Mend

kreuzberg 3.11.4py3-none-any.whl → 3.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

kreuzberg/__init__.py +14 -13
kreuzberg/__main__.py +0 -2
kreuzberg/_api/main.py +119 -9
kreuzberg/_config.py +248 -204
kreuzberg/_document_classification.py +0 -8
kreuzberg/_entity_extraction.py +1 -93
kreuzberg/_extractors/_base.py +0 -5
kreuzberg/_extractors/_email.py +1 -11
kreuzberg/_extractors/_html.py +9 -12
kreuzberg/_extractors/_image.py +1 -23
kreuzberg/_extractors/_pandoc.py +10 -89
kreuzberg/_extractors/_pdf.py +39 -92
kreuzberg/_extractors/_presentation.py +0 -17
kreuzberg/_extractors/_spread_sheet.py +13 -53
kreuzberg/_extractors/_structured.py +1 -4
kreuzberg/_gmft.py +14 -138
kreuzberg/_language_detection.py +1 -22
kreuzberg/_mcp/__init__.py +0 -2
kreuzberg/_mcp/server.py +3 -10
kreuzberg/_mime_types.py +1 -2
kreuzberg/_ocr/_easyocr.py +21 -108
kreuzberg/_ocr/_paddleocr.py +16 -94
kreuzberg/_ocr/_table_extractor.py +260 -0
kreuzberg/_ocr/_tesseract.py +906 -264
kreuzberg/_playa.py +5 -4
kreuzberg/_types.py +638 -40
kreuzberg/_utils/_cache.py +88 -90
kreuzberg/_utils/_device.py +0 -18
kreuzberg/_utils/_document_cache.py +0 -2
kreuzberg/_utils/_errors.py +0 -3
kreuzberg/_utils/_pdf_lock.py +0 -2
kreuzberg/_utils/_process_pool.py +19 -19
kreuzberg/_utils/_quality.py +0 -43
kreuzberg/_utils/_ref.py +48 -0
kreuzberg/_utils/_serialization.py +0 -5
kreuzberg/_utils/_string.py +9 -39
kreuzberg/_utils/_sync.py +0 -1
kreuzberg/_utils/_table.py +50 -57
kreuzberg/cli.py +54 -74
kreuzberg/extraction.py +39 -32
{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/METADATA +17 -14
kreuzberg-3.13.0.dist-info/RECORD +56 -0
kreuzberg-3.11.4.dist-info/RECORD +0 -54
{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/WHEEL +0 -0
{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/entry_points.txt +0 -0
{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/licenses/LICENSE +0 -0

kreuzberg/extraction.py CHANGED Viewed

@@ -151,20 +151,22 @@ async def extract_file(
     """
     cache = get_document_cache()
     path = Path(file_path)
-    cached_result = cache.get(path, config)
-    if cached_result is not None:
-        return cached_result
-    if cache.is_processing(path, config):
-        event = cache.mark_processing(path, config)
-        await anyio.to_thread.run_sync(event.wait)  # pragma: no cover
-        # Try cache again after waiting for other process to complete  # ~keep
-        cached_result = cache.get(path, config)  # pragma: no cover
-        if cached_result is not None:  # pragma: no cover
+    if config.use_cache:
+        cached_result = cache.get(path, config)
+        if cached_result is not None:
             return cached_result
-    cache.mark_processing(path, config)
+        if cache.is_processing(path, config):
+            event = cache.mark_processing(path, config)
+            await anyio.to_thread.run_sync(event.wait)  # pragma: no cover
+            # Try cache again after waiting for other process to complete  # ~keep
+            cached_result = cache.get(path, config)  # pragma: no cover
+            if cached_result is not None:  # pragma: no cover
+                return cached_result
+        cache.mark_processing(path, config)
     try:
         if not path.exists():
@@ -183,11 +185,13 @@ async def extract_file(
         result = await _validate_and_post_process_async(result=result, config=config, file_path=path)
-        cache.set(path, config, result)
+        if config.use_cache:
+            cache.set(path, config, result)
         return result
     finally:
-        cache.mark_complete(path, config)
+        if config.use_cache:
+            cache.mark_complete(path, config)
 async def batch_extract_file(
@@ -224,7 +228,7 @@ async def batch_extract_file(
                     content=f"Error: {type(e).__name__}: {e!s}",
                     mime_type="text/plain",
                     metadata={  # type: ignore[typeddict-unknown-key]
-                        "error": True,
+                        "error": f"{type(e).__name__}: {e!s}",
                         "error_context": create_error_context(
                             operation="batch_extract_file",
                             file_path=path,
@@ -273,7 +277,7 @@ async def batch_extract_bytes(
                     content=f"Error: {type(e).__name__}: {e!s}",
                     mime_type="text/plain",
                     metadata={  # type: ignore[typeddict-unknown-key]
-                        "error": True,
+                        "error": f"{type(e).__name__}: {e!s}",
                         "error_context": create_error_context(
                             operation="batch_extract_bytes",
                             error=e,
@@ -336,20 +340,22 @@ def extract_file_sync(
     """
     cache = get_document_cache()
     path = Path(file_path)
-    cached_result = cache.get(path, config)
-    if cached_result is not None:
-        return cached_result
-    if cache.is_processing(path, config):
-        event = cache.mark_processing(path, config)
-        event.wait()  # pragma: no cover
-        # Try cache again after waiting for other process to complete  # ~keep
-        cached_result = cache.get(path, config)  # pragma: no cover
-        if cached_result is not None:  # pragma: no cover
+    if config.use_cache:
+        cached_result = cache.get(path, config)
+        if cached_result is not None:
             return cached_result
-    cache.mark_processing(path, config)
+        if cache.is_processing(path, config):
+            event = cache.mark_processing(path, config)
+            event.wait()  # pragma: no cover
+            # Try cache again after waiting for other process to complete  # ~keep
+            cached_result = cache.get(path, config)  # pragma: no cover
+            if cached_result is not None:  # pragma: no cover
+                return cached_result
+        cache.mark_processing(path, config)
     try:
         if not path.exists():
@@ -360,7 +366,7 @@ def extract_file_sync(
             result = extractor.extract_path_sync(Path(file_path))
         else:
             result = ExtractionResult(
-                content=Path(file_path).read_text(),
+                content=Path(file_path).read_text(encoding="utf-8"),
                 chunks=[],
                 mime_type=mime_type,
                 metadata={},
@@ -368,11 +374,13 @@ def extract_file_sync(
         result = _validate_and_post_process_sync(result=result, config=config, file_path=path)
-        cache.set(path, config, result)
+        if config.use_cache:
+            cache.set(path, config, result)
         return result
     finally:
-        cache.mark_complete(path, config)
+        if config.use_cache:
+            cache.mark_complete(path, config)
 def batch_extract_file_sync(
@@ -404,7 +412,7 @@ def batch_extract_file_sync(
                 content=f"Error: {type(e).__name__}: {e!s}",
                 mime_type="text/plain",
                 metadata={  # type: ignore[typeddict-unknown-key]
-                    "error": True,
+                    "error": f"{type(e).__name__}: {e!s}",
                     "error_context": create_error_context(
                         operation="batch_extract_file_sync",
                         file_path=file_path,
@@ -455,7 +463,7 @@ def batch_extract_bytes_sync(
                 content=f"Error: {type(e).__name__}: {e!s}",
                 mime_type="text/plain",
                 metadata={  # type: ignore[typeddict-unknown-key]
-                    "error": True,
+                    "error": f"{type(e).__name__}: {e!s}",
                     "error_context": create_error_context(
                         operation="batch_extract_bytes_sync",
                         error=e,
@@ -469,7 +477,6 @@ def batch_extract_bytes_sync(
             return (index, error_result)
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        # Avoid creating intermediate list, use enumerate directly
         future_to_index = {executor.submit(extract_single, (i, content)): i for i, content in enumerate(contents)}
         results: list[ExtractionResult] = [None] * len(contents)  # type: ignore[list-item]

{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kreuzberg
-Version: 3.11.4
+Version: 3.13.0
 Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
 Project-URL: documentation, https://kreuzberg.dev
 Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -31,15 +31,16 @@ Requires-Python: >=3.10
 Requires-Dist: anyio>=4.10.0
 Requires-Dist: chardetng-py>=0.3.5
 Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
-Requires-Dist: html-to-markdown[lxml]>=1.9.0
+Requires-Dist: html-to-markdown[lxml]>=1.9.1
 Requires-Dist: mcp>=1.13.0
 Requires-Dist: msgspec>=0.18.0
 Requires-Dist: playa-pdf>=0.7.0
+Requires-Dist: polars>=1.33.0
 Requires-Dist: psutil>=7.0.0
 Requires-Dist: pypdfium2==4.30.0
 Requires-Dist: python-calamine>=0.3.2
 Requires-Dist: python-pptx>=1.0.2
-Requires-Dist: typing-extensions>=4.14.0; python_version < '3.12'
+Requires-Dist: typing-extensions>=4.15.0; python_version < '3.12'
 Provides-Extra: additional-extensions
 Requires-Dist: mailparse>=1.0.15; extra == 'additional-extensions'
 Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'additional-extensions'
@@ -54,7 +55,6 @@ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all
 Requires-Dist: mailparse>=1.0.15; extra == 'all'
 Requires-Dist: paddleocr>=3.2.0; extra == 'all'
 Requires-Dist: paddlepaddle>=3.1.1; extra == 'all'
-Requires-Dist: pandas>=2.3.2; extra == 'all'
 Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
 Requires-Dist: rich>=14.1.0; extra == 'all'
 Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
@@ -73,7 +73,6 @@ Provides-Extra: crypto
 Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'crypto'
 Provides-Extra: document-classification
 Requires-Dist: deep-translator>=1.11.4; extra == 'document-classification'
-Requires-Dist: pandas>=2.3.2; extra == 'document-classification'
 Provides-Extra: easyocr
 Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
 Provides-Extra: entity-extraction
@@ -109,8 +108,7 @@ Description-Content-Type: text/markdown
 - **Text Extraction**: High-fidelity text extraction preserving document structure and formatting
 - **Metadata Extraction**: Comprehensive metadata including author, creation date, language, and document properties
 - **Format Support**: 18 document types including PDF, Microsoft Office, images, HTML, and structured data formats
-- **OCR Integration**: Multiple OCR engines (Tesseract, EasyOCR, PaddleOCR) with automatic fallback
-- **Table Detection**: Structured table extraction with cell-level precision via GMFT integration
+- **OCR Integration**: Tesseract OCR with markdown output (default) and table extraction from scanned documents
 - **Document Classification**: Automatic document type detection (contracts, forms, invoices, receipts, reports)
 ### Technical Architecture
@@ -138,8 +136,8 @@ Kreuzberg leverages established open source technologies:
 # Extract text from any file to text format
 uvx kreuzberg extract document.pdf > output.txt
-# With all features (OCR, table extraction, etc.)
-uvx --from "kreuzberg[all]" kreuzberg extract invoice.pdf --ocr-backend tesseract --output-format text
+# With all features (chunking, language detection, etc.)
+uvx kreuzberg extract invoice.pdf --ocr-backend tesseract --output-format text
 # Extract with rich metadata
 uvx kreuzberg extract report.pdf --show-metadata --output-format json
@@ -179,10 +177,15 @@ print(f"Keywords: {result.metadata.keywords}")
 ### Docker
+Two optimized images available:
 ```bash
-# Run the REST API
+# Base image (API + CLI + multilingual OCR)
 docker run -p 8000:8000 goldziher/kreuzberg
+# Core image (+ chunking + crypto + document classification + language detection)
+docker run -p 8000:8000 goldziher/kreuzberg-core:latest
 # Extract via API
 curl -X POST -F "file=@document.pdf" http://localhost:8000/extract
 ```
@@ -196,7 +199,7 @@ curl -X POST -F "file=@document.pdf" http://localhost:8000/extract
 **Add to Claude Desktop with one command:**
 ```bash
-claude mcp add kreuzberg uvx -- --from "kreuzberg[all]" kreuzberg-mcp
+claude mcp add kreuzberg uvx kreuzberg-mcp
 ```
 **Or configure manually in `claude_desktop_config.json`:**
@@ -206,7 +209,7 @@ claude mcp add kreuzberg uvx -- --from "kreuzberg[all]" kreuzberg-mcp
   "mcpServers": {
     "kreuzberg": {
       "command": "uvx",
-      "args": ["--from", "kreuzberg[all]", "kreuzberg-mcp"]
+      "args": ["kreuzberg-mcp"]
     }
   }
 }
@@ -215,8 +218,8 @@ claude mcp add kreuzberg uvx -- --from "kreuzberg[all]" kreuzberg-mcp
 **MCP capabilities:**
 - Extract text from PDFs, images, Office docs, and more
-- Full OCR support with multiple engines
-- Table extraction and metadata parsing
+- Multilingual OCR support with Tesseract
+- Metadata parsing and language detection
 📖 **[MCP Documentation](https://kreuzberg.dev/user-guide/mcp-server/)**

kreuzberg-3.13.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,56 @@
+kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
+kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
+kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
+kreuzberg/_config.py,sha256=dSTumnpleMeUjUabWgAH7WlhTkdNG3eeMv8FSFmUaEI,15776
+kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
+kreuzberg/_document_classification.py,sha256=NZ-6tQtVa1OgigC7xf30hAsnL5_gi9ak9X2XYdsCfTI,6361
+kreuzberg/_entity_extraction.py,sha256=QFIPQ_fovEnEezpS6W4pwpjTA2PqS7TUCD9AKf8sAyc,4666
+kreuzberg/_gmft.py,sha256=60WpPTf7jocU-kmkBe-pBytl7l58aQzd-Aw2_Hlioug,21481
+kreuzberg/_language_detection.py,sha256=yLUliJOUyofVma_q6FwzG9Ck4-XX3AEjxleTHrqi8R4,2445
+kreuzberg/_mime_types.py,sha256=fwtPKtp2XhCLT686qF26PBMeOqcVJroKPwkp7JgaM0E,8462
+kreuzberg/_playa.py,sha256=1viLRqgcDWvaPo5ZsDPO2gqHFSBApOYortTV_SPVK9k,12190
+kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
+kreuzberg/_types.py,sha256=WFUFY1S7SL7kTfHCX-zGASLYT94FxLD71C9vGUzFOiA,38922
+kreuzberg/cli.py,sha256=MLeWoMcLoN6WnkbyRbOY-2dqp-vNZf7Nb-K_R5F5CoU,12730
+kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
+kreuzberg/extraction.py,sha256=jiMKiDyTf3sHyk76sMffHR-eH-_yg-DFRMuXEKufRYI,17649
+kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kreuzberg/_api/main.py,sha256=JALYRD0qwyoZloWk5dNNuslBtG4GlVNc0G2oADm6cAc,7578
+kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kreuzberg/_extractors/_base.py,sha256=EZTEJzwJxwu_yYFQ5QlZVNQMPCcli7yyUB4T5mFotCY,4209
+kreuzberg/_extractors/_email.py,sha256=mVi_VDmiFhe6NgiWxJDYt4DQiP6jVs5dP8BsPClm3WQ,6108
+kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
+kreuzberg/_extractors/_image.py,sha256=UZEOmKNAS4KjaX38iYq2Ux6Mta3juCF1MzWNeBxpPE8,3414
+kreuzberg/_extractors/_pandoc.py,sha256=zumwImIXwD3ziPhYxt0EQct5sSMy5lQiY6KnPSDxBTU,24183
+kreuzberg/_extractors/_pdf.py,sha256=766O7rXAeAJ42vPpWbGpW_WgHXm48eWwX09l3aqjKeM,18064
+kreuzberg/_extractors/_presentation.py,sha256=BJdEM9jsuAd0vb-PIRwNMcRj4xVjItb5kpOpnjsCBi0,10175
+kreuzberg/_extractors/_spread_sheet.py,sha256=wqAV-Stqfd4hXs5ock-chqBEdzv4voSgT1uFUO1cIU0,12075
+kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
+kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
+kreuzberg/_mcp/server.py,sha256=iYJG6g0u7I6mWtC4R1XlxydBrPpgnp5dGJzpm9QAZig,8438
+kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
+kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
+kreuzberg/_ocr/_easyocr.py,sha256=CtiHGx_BmuUwZhC7bScYF9mwnAxRrLWJ-X70fuwFTjk,14079
+kreuzberg/_ocr/_paddleocr.py,sha256=wCuIQ_yxPWE9hukiehYNRdt00Rb2h6pWdfqPS8hI2s0,14297
+kreuzberg/_ocr/_table_extractor.py,sha256=MeQLQn_bRco5OAcUoy613ZbZLCDBRJY8uHH_bUBSP8I,7613
+kreuzberg/_ocr/_tesseract.py,sha256=i_UTjOmrFxZbtmXxrQIsE78wtZLTyZph0i0jDQc4EMA,56916
+kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kreuzberg/_utils/_cache.py,sha256=fDqFp_54-Kyn3_4VkXkhovvNIB2osKqXlS13MlORrU8,14539
+kreuzberg/_utils/_device.py,sha256=35xQvrLSPISJlWicQGknoBjkwdalwVxiJbzyxwuwOVo,9747
+kreuzberg/_utils/_document_cache.py,sha256=CpCdJVd8SYLjfwm0ozSM8mx5x8i9vVDet3BlEUpzuZY,6920
+kreuzberg/_utils/_errors.py,sha256=ctD-s1q7vbEgqHQ3OVJiEOODDLTd2LvrM3z6o37zrGI,6395
+kreuzberg/_utils/_pdf_lock.py,sha256=mHB1A4Fo_nSfgdqUNEWODH9b5tNFqpEHcNE6rT41dGE,1886
+kreuzberg/_utils/_process_pool.py,sha256=ebuMPmHXPkWaLWjgAkeaONvAZo974PhfENN8pnPTCco,8415
+kreuzberg/_utils/_quality.py,sha256=m3SIXGDY9pfRmh3XeKdZWT1vBz7issH0SfKsutEuRxw,5833
+kreuzberg/_utils/_ref.py,sha256=uP_S3x0AQH2Nyjo1tYEj7N_u9hGzYVewdjch6a8Fv5I,1458
+kreuzberg/_utils/_serialization.py,sha256=duKP5OuBvi-m6ljQOhoyuJU7sl2WPnov8yJDpYuDArw,2052
+kreuzberg/_utils/_string.py,sha256=yrcwHHl23FxWrNoFXkmR3icgivfvbLRvkqQek8F3qqI,5020
+kreuzberg/_utils/_sync.py,sha256=mc-K2y_sc6mG-HOswlHTXAWaEzgisEERvq9PPw2dAw4,4869
+kreuzberg/_utils/_table.py,sha256=dYM_dWNHRCXcWOhSQBnahOJaBXyuQFyYX9arHrH4TF8,7555
+kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
+kreuzberg-3.13.0.dist-info/METADATA,sha256=896BWDLD6ApGiOQFKXMqQezC4qgKRUxjMqbZVWxBoJ0,12098
+kreuzberg-3.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+kreuzberg-3.13.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
+kreuzberg-3.13.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
+kreuzberg-3.13.0.dist-info/RECORD,,

kreuzberg-3.11.4.dist-info/RECORD DELETED Viewed

@@ -1,54 +0,0 @@
-kreuzberg/__init__.py,sha256=0OJ_jNKbS6GxzWC5-EfRCiE80as_ya0-wwyNsTYbxzY,1721
-kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
-kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
-kreuzberg/_config.py,sha256=Au521UiR7vcQs_8_hhoWIfmDDMJIrDM3XZUB_qHfCmo,14035
-kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
-kreuzberg/_document_classification.py,sha256=qFGmwvUMhnNAvNNJO7E-huPx-Ps-_DWxdNxsozIzgaw,6870
-kreuzberg/_entity_extraction.py,sha256=Oa1T-9mptimpOHtcda-GtrVYH9PFy7DSJj3thJZUD7k,7902
-kreuzberg/_gmft.py,sha256=6P4gSSmU39puaYAKmdGr9ALf0USYTwRDuvvhG1LmI24,26441
-kreuzberg/_language_detection.py,sha256=_Ng2aHgPxOHFgd507gVNiIGVmnxxbpgYwsO0bD0yTzg,3315
-kreuzberg/_mime_types.py,sha256=2warRVqfBUNIg8JBg8yP4pRqaMPvwINosHMkJwtH_Fc,8488
-kreuzberg/_playa.py,sha256=_IPrUSWwSfDQlWXOpKlauV0D9MhGrujGP5kmQ0U3L0g,12188
-kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
-kreuzberg/_types.py,sha256=bMaU6VuoqwOpW6ufshA-DWpNw6t9EokjEDEfFsznvdo,15389
-kreuzberg/cli.py,sha256=nG1CD_h50EWLmDbrb0_DffRl25uTCKeCS6_gRVpjEdU,12578
-kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
-kreuzberg/extraction.py,sha256=Kt1mOxdlOb35yVOdpdhiRPuTgA9BW_TTG9qwCkSxSkc,17332
-kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kreuzberg/_api/main.py,sha256=8VwxRlIXwnPs7ZYm0saUZsNOjevEAWJQpNreG-X7ZpE,3273
-kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kreuzberg/_extractors/_base.py,sha256=H_nwynBX3fozncVjV13c329x5eCLl5r7nyVTLQyDAzI,4396
-kreuzberg/_extractors/_email.py,sha256=Jpr4NFef640uVgNFkR1or-omy8RVt-NOHUYgWRDjyBo,6753
-kreuzberg/_extractors/_html.py,sha256=lOM1Tgrrvd7vpEeFAxC1dp0Tibr6N2FEHCjgFx0FK64,1745
-kreuzberg/_extractors/_image.py,sha256=Iz1JpvGqcYyh9g4zO_bMZG3E9S39KNHFu8PrXDRXeOk,4513
-kreuzberg/_extractors/_pandoc.py,sha256=51k7XISfKaPorhapG7aIeQb94KGsfozxKyT2rwhk9Bk,26553
-kreuzberg/_extractors/_pdf.py,sha256=OflyvwEkuFLmw8E3si35MCGH31fvd5o50VdMmu5QRVs,19884
-kreuzberg/_extractors/_presentation.py,sha256=CUlqZl_QCdJdumsZh0BpROkFbvi9uq7yMoIt3bRTUeE,10859
-kreuzberg/_extractors/_spread_sheet.py,sha256=iagiyJsnl-89OP1eqmEv8jWl7gZBJm2x0YOyqBgLasA,13733
-kreuzberg/_extractors/_structured.py,sha256=PbNaXd-_PUPsE0yZkISod_vLBokbWdVTKEPpEmqaEMM,5787
-kreuzberg/_mcp/__init__.py,sha256=8PYV-omC8Rln7Cove8C3rHu3d7sR1FuiwSBG1O7vkAE,92
-kreuzberg/_mcp/server.py,sha256=Dxed80MqZsYCFyYo0QdArpKE4H8DhpKY34fijdzV5uw,8731
-kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
-kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
-kreuzberg/_ocr/_easyocr.py,sha256=eU4MA_B_-cvq_IhpCeYUruL_kqcfm8maNZKP7zvVQHI,17512
-kreuzberg/_ocr/_paddleocr.py,sha256=I7ns6L56a2Ol460Bge6e0hpc2AkkwDepLcpCsABj5Dc,17609
-kreuzberg/_ocr/_tesseract.py,sha256=teLMH1pBhpcmEXDcyZlv56hYINLGMuaKZ0CQtcu_czQ,31510
-kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kreuzberg/_utils/_cache.py,sha256=hYd_a5Ni5VJBE1XU_eN9gvQ5gg0FRsdbRgmJe-OIJHM,15253
-kreuzberg/_utils/_device.py,sha256=JI9p9TGSfQHEi2SL-ovOXMr9RUnVq-RrEly89OvmQ5w,10485
-kreuzberg/_utils/_document_cache.py,sha256=ka90JIT-FXUMOv8z2u3fztQgZZb2XQDHTMnBi32mySA,7005
-kreuzberg/_utils/_errors.py,sha256=UsktQ_p7eOj9crPsFDg8HgRSE5-IpuFC7y1e6dDI_fY,6503
-kreuzberg/_utils/_pdf_lock.py,sha256=nqxAYCNlfWDrJtP4ZNu57st1YnkDl-gYXdr0q8nv0kA,1961
-kreuzberg/_utils/_process_pool.py,sha256=4BqhmRspwMyPT2EBfTu_rrn7v722wlMLD8qlYvYsc00,8621
-kreuzberg/_utils/_quality.py,sha256=-nKzj5n7yJDYrvl556oq2T5S5oKMEOrjpcRMlZ00Jqo,7668
-kreuzberg/_utils/_serialization.py,sha256=cqqxqN2cmtndBhIr4v2wqiMwnNadnKhvuN7EUj3i18M,2290
-kreuzberg/_utils/_string.py,sha256=bCzO3UO6nXupxvtMWvHqfp1Vd9CTzEH9jmpJXQ7upAU,6800
-kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
-kreuzberg/_utils/_table.py,sha256=IomrfQBP85DZI8RmQjOVs2Siq7VP9FUTYPaZR4t3yRw,8199
-kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
-kreuzberg-3.11.4.dist-info/METADATA,sha256=l3d8PyVfX_aEgXl5ykkuRHJi-8Qzhu4_KcHDYOK2RYg,12136
-kreuzberg-3.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-kreuzberg-3.11.4.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
-kreuzberg-3.11.4.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
-kreuzberg-3.11.4.dist-info/RECORD,,

{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

kreuzberg 3.11.4__py3-none-any.whl → 3.13.0__py3-none-any.whl

kreuzberg 3.11.4py3-none-any.whl → 3.13.0py3-none-any.whl