kreuzberg 3.6.2__tar.gz → 3.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/PKG-INFO +54 -2
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/README.md +52 -1
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/index.md +1 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/index.md +1 -0
- kreuzberg-3.7.0/docs/user-guide/mcp-server.md +571 -0
- kreuzberg-3.7.0/kreuzberg/_mcp/__init__.py +5 -0
- kreuzberg-3.7.0/kreuzberg/_mcp/server.py +227 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/mkdocs.yaml +1 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/pyproject.toml +3 -1
- kreuzberg-3.7.0/tests/mcp_server_test.py +374 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/uv.lock +170 -1
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.commitlintrc +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.docker/Dockerfile +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.docker/README.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.dockerignore +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/dependabot.yaml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/ci.yaml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/docs.yml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/pr-title.yaml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/publish-docker.yml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/release.yaml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.gitignore +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.gitmodules +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.markdownlint.yaml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.pre-commit-config.yaml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/LICENSE +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/ai-rulez.yaml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/README.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/benchmark_baseline.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/end_to_end_benchmark.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/final_benchmark.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/pyproject.toml +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/baseline_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/benchmark_msgpack_20250702_003800.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/comprehensive_caching_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/final_benchmark_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/mime_caching_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/msgspec_caching_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/ocr_caching_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/serialization_benchmark_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/statistical_benchmark_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/table_caching_results.json +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/serialization_benchmark.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/__main__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/benchmarks.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/cli.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/models.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/profiler.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/runner.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/statistical_benchmark.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/custom-extractors.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/custom-hooks.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/error-handling.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/index.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/performance.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/exceptions.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/extraction-functions.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/extractor-registry.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/index.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/ocr-configuration.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/types.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/assets/favicon.png +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/assets/logo.png +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/changelog.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/cli.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/contributing.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/css/extra.css +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/examples/extraction-examples.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/examples/index.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/getting-started/index.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/getting-started/installation.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/getting-started/quick-start.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/api-server.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/basic-usage.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/chunking.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/docker.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/extraction-configuration.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/metadata-extraction.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/ocr-backends.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/ocr-configuration.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/supported-formats.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/__main__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_api/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_api/main.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_chunker.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_cli_config.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_constants.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_entity_extraction.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_base.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_html.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_image.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_pandoc.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_pdf.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_presentation.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_spread_sheet.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_gmft.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_language_detection.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_mime_types.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/gmft_isolated.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/process_manager.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/sync_easyocr.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/sync_paddleocr.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/sync_tesseract.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/tesseract_pool.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/_base.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/_easyocr.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/_paddleocr.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/_tesseract.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_playa.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_registry.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_types.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_cache.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_device.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_document_cache.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_errors.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_pdf_lock.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_process_pool.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_serialization.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_string.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_sync.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_tmp.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/cli.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/exceptions.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/extraction.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/py.typed +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/api/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/api/main_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/chunker_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/cli_integration_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/cli_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/conftest.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/entity_extraction_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/exceptions_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extraction_batch_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extraction_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/html_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/image_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/pandoc_metadata_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/pandoc_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/pdf_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/presentation_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/spreed_sheet_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/gmft_extended_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/gmft_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/hooks_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/language_detection_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/mime_types_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/gmft_integration_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/process_manager_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/sync_tesseract_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/tesseract_pool_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/base_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/device_integration_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/easyocr_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/init_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/paddleocr_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/tesseract_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/playa_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/registry_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/document.docx +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/excel.xlsx +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/french-text.txt +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/german-text.txt +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/html.html +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/markdown.md +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/non-ascii-text.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/non-searchable.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/ocr-image.jpg +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/sample-contract.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/scanned.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/searchable.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/spanish-text.txt +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/test-article.pdf +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/types_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/__init__.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/cache_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/device_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/errors_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/pdf_lock_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/process_pool_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/serialization_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/string_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/sync_test.py +0 -0
- {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/tmp_test.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.7.0
|
4
4
|
Summary: A text extraction library supporting PDFs, images, office documents and more
|
5
5
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
6
6
|
Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
|
@@ -26,6 +26,7 @@ Requires-Dist: anyio>=4.9.0
|
|
26
26
|
Requires-Dist: charset-normalizer>=3.4.2
|
27
27
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
28
28
|
Requires-Dist: html-to-markdown[lxml]>=1.6.0
|
29
|
+
Requires-Dist: mcp>=1.11.0
|
29
30
|
Requires-Dist: msgspec>=0.18.0
|
30
31
|
Requires-Dist: playa-pdf>=0.6.1
|
31
32
|
Requires-Dist: psutil>=7.0.0
|
@@ -90,7 +91,8 @@ Description-Content-Type: text/markdown
|
|
90
91
|
- **🏠 Local Processing**: No cloud dependencies or external API calls
|
91
92
|
- **📦 Rich Format Support**: PDFs, images, Office docs, HTML, and more
|
92
93
|
- **🔍 Multiple OCR Engines**: Tesseract, EasyOCR, and PaddleOCR support
|
93
|
-
-
|
94
|
+
- **🤖 AI Integration**: Native MCP server for Claude and other AI tools
|
95
|
+
- **🐳 Production Ready**: CLI, REST API, MCP server, and Docker images included
|
94
96
|
|
95
97
|
## Quick Start
|
96
98
|
|
@@ -136,6 +138,55 @@ asyncio.run(main())
|
|
136
138
|
|
137
139
|
## Deployment Options
|
138
140
|
|
141
|
+
### 🤖 MCP Server (AI Integration)
|
142
|
+
|
143
|
+
**Connect directly to Claude Desktop, Cursor, and other AI tools with the Model Context Protocol:**
|
144
|
+
|
145
|
+
```bash
|
146
|
+
# Install and run MCP server with all features (recommended)
|
147
|
+
pip install "kreuzberg[all]"
|
148
|
+
kreuzberg-mcp
|
149
|
+
|
150
|
+
# Or with uvx (recommended for Claude Desktop)
|
151
|
+
uvx --with "kreuzberg[all]" kreuzberg-mcp
|
152
|
+
|
153
|
+
# Basic installation (core features only)
|
154
|
+
pip install kreuzberg
|
155
|
+
kreuzberg-mcp
|
156
|
+
```
|
157
|
+
|
158
|
+
**Configure in Claude Desktop (`claude_desktop_config.json`):**
|
159
|
+
|
160
|
+
```json
|
161
|
+
{
|
162
|
+
"mcpServers": {
|
163
|
+
"kreuzberg": {
|
164
|
+
"command": "uvx",
|
165
|
+
"args": ["--with", "kreuzberg[all]", "kreuzberg-mcp"]
|
166
|
+
}
|
167
|
+
}
|
168
|
+
}
|
169
|
+
```
|
170
|
+
|
171
|
+
**Basic configuration (core features only):**
|
172
|
+
|
173
|
+
```json
|
174
|
+
{
|
175
|
+
"mcpServers": {
|
176
|
+
"kreuzberg": {
|
177
|
+
"command": "uvx",
|
178
|
+
"args": ["kreuzberg-mcp"]
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
```
|
183
|
+
|
184
|
+
**Available MCP capabilities:**
|
185
|
+
|
186
|
+
- **Tools**: `extract_document`, `extract_bytes`, `extract_simple`
|
187
|
+
- **Resources**: Configuration, supported formats, OCR backends
|
188
|
+
- **Prompts**: Extract-and-summarize, structured analysis workflows
|
189
|
+
|
139
190
|
### 🐳 Docker (Recommended)
|
140
191
|
|
141
192
|
```bash
|
@@ -221,6 +272,7 @@ kreuzberg extract *.pdf --output-dir ./extracted/
|
|
221
272
|
|
222
273
|
## Advanced Features
|
223
274
|
|
275
|
+
- **🤖 MCP Server**: Native integration with Claude Desktop and AI tools
|
224
276
|
- **📊 Table Extraction**: Extract tables from PDFs with GMFT
|
225
277
|
- **🧩 Content Chunking**: Split documents for RAG applications
|
226
278
|
- **🎯 Custom Extractors**: Extend with your own document handlers
|
@@ -18,7 +18,8 @@
|
|
18
18
|
- **🏠 Local Processing**: No cloud dependencies or external API calls
|
19
19
|
- **📦 Rich Format Support**: PDFs, images, Office docs, HTML, and more
|
20
20
|
- **🔍 Multiple OCR Engines**: Tesseract, EasyOCR, and PaddleOCR support
|
21
|
-
-
|
21
|
+
- **🤖 AI Integration**: Native MCP server for Claude and other AI tools
|
22
|
+
- **🐳 Production Ready**: CLI, REST API, MCP server, and Docker images included
|
22
23
|
|
23
24
|
## Quick Start
|
24
25
|
|
@@ -64,6 +65,55 @@ asyncio.run(main())
|
|
64
65
|
|
65
66
|
## Deployment Options
|
66
67
|
|
68
|
+
### 🤖 MCP Server (AI Integration)
|
69
|
+
|
70
|
+
**Connect directly to Claude Desktop, Cursor, and other AI tools with the Model Context Protocol:**
|
71
|
+
|
72
|
+
```bash
|
73
|
+
# Install and run MCP server with all features (recommended)
|
74
|
+
pip install "kreuzberg[all]"
|
75
|
+
kreuzberg-mcp
|
76
|
+
|
77
|
+
# Or with uvx (recommended for Claude Desktop)
|
78
|
+
uvx --with "kreuzberg[all]" kreuzberg-mcp
|
79
|
+
|
80
|
+
# Basic installation (core features only)
|
81
|
+
pip install kreuzberg
|
82
|
+
kreuzberg-mcp
|
83
|
+
```
|
84
|
+
|
85
|
+
**Configure in Claude Desktop (`claude_desktop_config.json`):**
|
86
|
+
|
87
|
+
```json
|
88
|
+
{
|
89
|
+
"mcpServers": {
|
90
|
+
"kreuzberg": {
|
91
|
+
"command": "uvx",
|
92
|
+
"args": ["--with", "kreuzberg[all]", "kreuzberg-mcp"]
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
```
|
97
|
+
|
98
|
+
**Basic configuration (core features only):**
|
99
|
+
|
100
|
+
```json
|
101
|
+
{
|
102
|
+
"mcpServers": {
|
103
|
+
"kreuzberg": {
|
104
|
+
"command": "uvx",
|
105
|
+
"args": ["kreuzberg-mcp"]
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
```
|
110
|
+
|
111
|
+
**Available MCP capabilities:**
|
112
|
+
|
113
|
+
- **Tools**: `extract_document`, `extract_bytes`, `extract_simple`
|
114
|
+
- **Resources**: Configuration, supported formats, OCR backends
|
115
|
+
- **Prompts**: Extract-and-summarize, structured analysis workflows
|
116
|
+
|
67
117
|
### 🐳 Docker (Recommended)
|
68
118
|
|
69
119
|
```bash
|
@@ -149,6 +199,7 @@ kreuzberg extract *.pdf --output-dir ./extracted/
|
|
149
199
|
|
150
200
|
## Advanced Features
|
151
201
|
|
202
|
+
- **🤖 MCP Server**: Native integration with Claude Desktop and AI tools
|
152
203
|
- **📊 Table Extraction**: Extract tables from PDFs with GMFT
|
153
204
|
- **🧩 Content Chunking**: Split documents for RAG applications
|
154
205
|
- **🎯 Custom Extractors**: Extend with your own document handlers
|
@@ -6,6 +6,7 @@ Kreuzberg is a Python library for text extraction from documents. It provides a
|
|
6
6
|
|
7
7
|
- **Simple and Hassle-Free**: Clean API that just works, without complex configuration
|
8
8
|
- **Local Processing**: No external API calls or cloud dependencies required
|
9
|
+
- **AI Integration**: Native MCP server for Claude Desktop and other AI tools
|
9
10
|
- **Resource Efficient**: Lightweight processing without GPU requirements
|
10
11
|
- **Small Package Size**: Has few curated dependencies and a minimal footprint
|
11
12
|
- **Format Support**: Comprehensive support for documents, images, and text formats
|
@@ -11,6 +11,7 @@ This guide covers the main concepts and usage patterns of Kreuzberg.
|
|
11
11
|
- [OCR Configuration](ocr-configuration.md) - Configure OCR settings ([API](../api-reference/ocr-configuration.md))
|
12
12
|
- [OCR Backends](ocr-backends.md) - Choose and configure different OCR engines
|
13
13
|
- [Supported Formats](supported-formats.md) - All supported document formats
|
14
|
+
- [MCP Server](mcp-server.md) - Model Context Protocol server for AI integration
|
14
15
|
- [API Server](api-server.md) - REST API for document extraction
|
15
16
|
- [Docker](docker.md) - Using Kreuzberg with Docker
|
16
17
|
|