kreuzberg 3.13.2__tar.gz → 3.14.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/ci.yaml +183 -20
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/docker-e2e-tests.yml +2 -3
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/docs.yml +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/publish-docker.yml +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/release.yaml +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.gitignore +3 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.pre-commit-config.yaml +2 -7
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/PKG-INFO +10 -10
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/ai-rulez.yaml +236 -176
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/examples/extraction-examples.md +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/api-server.md +59 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/extraction-configuration.md +75 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/ocr-configuration.md +65 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_api/main.py +117 -15
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_config.py +3 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_image.py +20 -2
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_pdf.py +21 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_spread_sheet.py +16 -2
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_gmft.py +79 -33
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_mcp/server.py +0 -76
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_base.py +1 -2
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_paddleocr.py +39 -13
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_tesseract.py +16 -6
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_registry.py +26 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_types.py +64 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_cache.py +34 -12
- kreuzberg-3.14.0/kreuzberg/_utils/_image_preprocessing.py +346 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_ocr_cache.py +2 -5
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_process_pool.py +3 -3
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_table.py +4 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/cli.py +19 -2
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/extraction.py +4 -4
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/pyproject.toml +15 -15
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/api/main_test.py +31 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/api/runtime_config_test.py +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/conftest.py +99 -0
- kreuzberg-3.14.0/tests/core/dpi_configuration_test.py +353 -0
- kreuzberg-3.14.0/tests/core/html_to_markdown_config_test.py +0 -0
- kreuzberg-3.14.0/tests/core/mime_types_test.py +0 -0
- kreuzberg-3.14.0/tests/core/registry_test.py +0 -0
- kreuzberg-3.14.0/tests/core/types_test.py +0 -0
- kreuzberg-3.14.0/tests/e2e/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/e2e/docker_e2e_test.py +4 -4
- kreuzberg-3.14.0/tests/extractors/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/html_test.py +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/image_test.py +7 -3
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/pandoc_test.py +1 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/pdf_test.py +7 -22
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/presentation_test.py +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/spreed_sheet_test.py +4 -0
- kreuzberg-3.14.0/tests/features/__init__.py +0 -0
- kreuzberg-3.14.0/tests/features/chunker_test.py +0 -0
- kreuzberg-3.14.0/tests/features/document_classification_test.py +0 -0
- kreuzberg-3.14.0/tests/features/entity_extraction_test.py +0 -0
- kreuzberg-3.14.0/tests/features/gmft_test.py +0 -0
- kreuzberg-3.14.0/tests/features/hooks_test.py +0 -0
- kreuzberg-3.14.0/tests/features/language_detection_test.py +0 -0
- kreuzberg-3.14.0/tests/integration/__init__.py +0 -0
- kreuzberg-3.14.0/tests/integration/api/__init__.py +0 -0
- kreuzberg-3.14.0/tests/integration/api/large_file_test.py +0 -0
- kreuzberg-3.14.0/tests/integration/api/mounted_config_test.py +0 -0
- kreuzberg-3.14.0/tests/integration/dpi_integration_test.py +244 -0
- kreuzberg-3.14.0/tests/integration/multiprocessing/__init__.py +0 -0
- kreuzberg-3.14.0/tests/integration/multiprocessing/gmft_integration_test.py +0 -0
- kreuzberg-3.14.0/tests/integration/ocr/__init__.py +0 -0
- kreuzberg-3.14.0/tests/integration/ocr/device_integration_test.py +0 -0
- kreuzberg-3.14.0/tests/integration/ocr/tesseract_sync_formats_test.py +0 -0
- kreuzberg-3.14.0/tests/integration/ocr/tesseract_tsv_integration_test.py +0 -0
- kreuzberg-3.14.0/tests/integration/regression_test.py +134 -0
- kreuzberg-3.14.0/tests/interfaces/__init__.py +0 -0
- kreuzberg-3.14.0/tests/mcp/__init__.py +0 -0
- kreuzberg-3.14.0/tests/mcp/mcp_server_test.py +0 -0
- kreuzberg-3.14.0/tests/multiprocessing/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/multiprocessing/gmft_isolated_test.py +54 -58
- kreuzberg-3.14.0/tests/ocr/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/easyocr_test.py +1 -10
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/paddleocr_test.py +7 -18
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/tesseract_test.py +1 -3
- kreuzberg-3.14.0/tests/test_source_files/Xerox_AltaLink_series_mfp_sag_en-US 2.pdf +0 -0
- kreuzberg-3.14.0/tests/test_source_files/google-doc-document.pdf +0 -0
- kreuzberg-3.14.0/tests/test_source_files/sharable-web-guide.pdf +0 -0
- kreuzberg-3.14.0/tests/test_source_files/test-excel.xls +0 -0
- kreuzberg-3.14.0/tests/utils/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/device_test.py +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/ocr_cache_test.py +7 -43
- kreuzberg-3.14.0/tests/utils/playa_helpers_test.py +0 -0
- kreuzberg-3.14.0/tests/utils/playa_test.py +0 -0
- kreuzberg-3.14.0/tests/utils/quality_test.py +121 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/serialization_test.py +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/table_test.py +26 -26
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/tmp_test.py +1 -1
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/uv.lock +681 -596
- kreuzberg-3.13.2/tests/chunker_test.py +0 -102
- kreuzberg-3.13.2/tests/cli_command_test.py +0 -481
- kreuzberg-3.13.2/tests/cli_integration_test.py +0 -858
- kreuzberg-3.13.2/tests/cli_test.py +0 -324
- kreuzberg-3.13.2/tests/config_test.py +0 -1540
- kreuzberg-3.13.2/tests/document_classification_test.py +0 -837
- kreuzberg-3.13.2/tests/entity_extraction_test.py +0 -588
- kreuzberg-3.13.2/tests/exceptions_test.py +0 -91
- kreuzberg-3.13.2/tests/extraction_batch_test.py +0 -253
- kreuzberg-3.13.2/tests/extraction_test.py +0 -752
- kreuzberg-3.13.2/tests/gmft_extended_test.py +0 -137
- kreuzberg-3.13.2/tests/gmft_test.py +0 -788
- kreuzberg-3.13.2/tests/hooks_test.py +0 -205
- kreuzberg-3.13.2/tests/html_to_markdown_config_test.py +0 -217
- kreuzberg-3.13.2/tests/language_detection_test.py +0 -152
- kreuzberg-3.13.2/tests/mcp_server_test.py +0 -757
- kreuzberg-3.13.2/tests/mime_types_test.py +0 -195
- kreuzberg-3.13.2/tests/multiprocessing/gmft_integration_test.py +0 -98
- kreuzberg-3.13.2/tests/ocr/device_integration_test.py +0 -268
- kreuzberg-3.13.2/tests/ocr/tesseract_tsv_integration_test.py +0 -273
- kreuzberg-3.13.2/tests/playa_helpers_test.py +0 -473
- kreuzberg-3.13.2/tests/playa_test.py +0 -111
- kreuzberg-3.13.2/tests/registry_test.py +0 -190
- kreuzberg-3.13.2/tests/tesseract_sync_formats_test.py +0 -169
- kreuzberg-3.13.2/tests/types_test.py +0 -374
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.commitlintrc +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.deepsource.toml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.docker/Dockerfile +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.docker/README.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.dockerignore +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/dependabot.yaml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/pr-title.yaml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/test-docker-builds.yml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.markdownlint.yaml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/LICENSE +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/README.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/Taskfile.yml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/README.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/pyproject.toml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/__main__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/benchmarks.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/cli.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/models.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/profiler.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/runner.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docker-compose.example.yml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docker-logs/docker-info.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docker-logs/docker-version.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/custom-extractors.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/custom-hooks.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/error-handling.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/index.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/performance.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/exceptions.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/extraction-functions.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/extractor-registry.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/index.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/ocr-configuration.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/types.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/assets/favicon.png +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/assets/logo.png +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/cli.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/contributing.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/css/extra.css +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/examples/index.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/getting-started/index.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/getting-started/installation.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/getting-started/quick-start.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/index.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/basic-usage.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/chunking.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/docker.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/document-classification.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/index.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/mcp-server.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/metadata-extraction.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/ocr-backends.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/supported-formats.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/__main__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_api/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_chunker.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_constants.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_document_classification.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_entity_extraction.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_base.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_email.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_html.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_pandoc.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_presentation.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_structured.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_language_detection.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_mcp/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_mime_types.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_easyocr.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_table_extractor.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_playa.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_device.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_document_cache.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_errors.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_pdf_lock.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_quality.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_ref.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_serialization.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_string.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_sync.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_tmp.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/exceptions.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/py.typed +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/mkdocs.yaml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/output.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/results/baseline.json +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/results/serialization.json +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/results/statistical.json +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/test_report.json +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/api/__init__.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/api/conftest.py +0 -0
- {kreuzberg-3.13.2/tests/e2e → kreuzberg-3.14.0/tests/core}/__init__.py +0 -0
- /kreuzberg-3.13.2/tests/extractors/__init__.py → /kreuzberg-3.14.0/tests/core/config_test.py +0 -0
- /kreuzberg-3.13.2/tests/multiprocessing/__init__.py → /kreuzberg-3.14.0/tests/core/exceptions_test.py +0 -0
- /kreuzberg-3.13.2/tests/ocr/__init__.py → /kreuzberg-3.14.0/tests/core/extraction_batch_test.py +0 -0
- /kreuzberg-3.13.2/tests/utils/__init__.py → /kreuzberg-3.14.0/tests/core/extraction_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/email_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/pandoc_metadata_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/structured_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/multiprocessing/process_manager_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/multiprocessing/tesseract_pool_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/base_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/init_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/tesseract_tsv_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/contract.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/contract_test.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/document.docx +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/email/sample-email.eml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/excel.xlsx +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/flower-no-text.jpg +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/form_test.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/french-text.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/german-text.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/html.html +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/images/test_hello_world.png +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/invoice_image.png +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/invoice_test.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/json/sample-document.json +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/layout-parser-ocr.jpg +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/markdown.md +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/non-ascii-text.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/non-searchable.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/ocr-image.jpg +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/receipt_test.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/report_test.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/sample-contract.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/scanned.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/searchable.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/spanish-text.txt +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/tables/borderless_table.png +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/tables/complex_document.png +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/tables/simple_table.png +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/test-article.pdf +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/yaml/sample-config.yaml +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/cache_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/errors_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/pdf_lock_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/process_pool_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/ref_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/string_test.py +0 -0
- {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/sync_test.py +0 -0
@@ -7,7 +7,6 @@ on:
|
|
7
7
|
push:
|
8
8
|
branches:
|
9
9
|
- main
|
10
|
-
- feat/smart-multiprocessing
|
11
10
|
|
12
11
|
jobs:
|
13
12
|
validate:
|
@@ -23,7 +22,7 @@ jobs:
|
|
23
22
|
enable-cache: true
|
24
23
|
|
25
24
|
- name: Set up Python
|
26
|
-
uses: actions/setup-python@
|
25
|
+
uses: actions/setup-python@v6
|
27
26
|
with:
|
28
27
|
python-version-file: "pyproject.toml"
|
29
28
|
|
@@ -38,7 +37,7 @@ jobs:
|
|
38
37
|
echo "Removing existing .venv directory on Windows"
|
39
38
|
rm -rf .venv
|
40
39
|
fi
|
41
|
-
uv sync --all-
|
40
|
+
uv sync --all-extras --dev
|
42
41
|
shell: bash
|
43
42
|
|
44
43
|
- name: Load Cached Pre-Commit Dependencies
|
@@ -53,8 +52,9 @@ jobs:
|
|
53
52
|
|
54
53
|
coverage:
|
55
54
|
needs: validate
|
55
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
56
56
|
runs-on: ubuntu-latest
|
57
|
-
timeout-minutes:
|
57
|
+
timeout-minutes: 120
|
58
58
|
steps:
|
59
59
|
- name: Checkout
|
60
60
|
uses: actions/checkout@v5
|
@@ -65,7 +65,7 @@ jobs:
|
|
65
65
|
enable-cache: true
|
66
66
|
|
67
67
|
- name: Install Python
|
68
|
-
uses: actions/setup-python@
|
68
|
+
uses: actions/setup-python@v6
|
69
69
|
id: setup-python
|
70
70
|
with:
|
71
71
|
python-version: "3.13"
|
@@ -88,7 +88,7 @@ jobs:
|
|
88
88
|
max_attempts: 3
|
89
89
|
retry_wait_seconds: 30
|
90
90
|
command: |
|
91
|
-
uv sync --all-
|
91
|
+
uv sync --all-extras --dev
|
92
92
|
shell: bash
|
93
93
|
|
94
94
|
- name: Install System Dependencies
|
@@ -115,7 +115,7 @@ jobs:
|
|
115
115
|
shell: bash
|
116
116
|
|
117
117
|
- name: Upload Coverage to DeepSource
|
118
|
-
if: always()
|
118
|
+
if: always()
|
119
119
|
env:
|
120
120
|
DEEPSOURCE_DSN: ${{ secrets.DEEPSOURCE_DSN }}
|
121
121
|
run: |
|
@@ -134,15 +134,178 @@ jobs:
|
|
134
134
|
.coverage
|
135
135
|
retention-days: 7
|
136
136
|
|
137
|
-
test:
|
138
|
-
needs:
|
137
|
+
test-pr:
|
138
|
+
needs: validate
|
139
|
+
if: github.event_name == 'pull_request' && needs.validate.result == 'success'
|
140
|
+
runs-on: ubuntu-latest
|
141
|
+
strategy:
|
142
|
+
fail-fast: false
|
143
|
+
matrix:
|
144
|
+
test-category:
|
145
|
+
- name: "core"
|
146
|
+
path: "tests/core,tests/utils"
|
147
|
+
system-deps: false
|
148
|
+
timeout: 15
|
149
|
+
- name: "extractors"
|
150
|
+
path: "tests/extractors"
|
151
|
+
system-deps: true
|
152
|
+
timeout: 20
|
153
|
+
- name: "integration"
|
154
|
+
path: "tests/integration,tests/api"
|
155
|
+
system-deps: true
|
156
|
+
timeout: 25
|
157
|
+
- name: "features"
|
158
|
+
path: "tests/features,tests/interfaces,tests/mcp,tests/multiprocessing,tests/ocr"
|
159
|
+
system-deps: true
|
160
|
+
timeout: 20
|
161
|
+
timeout-minutes: ${{ matrix.test-category.timeout }}
|
162
|
+
steps:
|
163
|
+
- name: Checkout
|
164
|
+
uses: actions/checkout@v5
|
165
|
+
|
166
|
+
- name: Install uv
|
167
|
+
uses: astral-sh/setup-uv@v6
|
168
|
+
with:
|
169
|
+
enable-cache: true
|
170
|
+
|
171
|
+
- name: Install Python
|
172
|
+
uses: actions/setup-python@v6
|
173
|
+
with:
|
174
|
+
python-version: "3.13"
|
175
|
+
|
176
|
+
- name: Cache Python Dependencies
|
177
|
+
uses: actions/cache@v4
|
178
|
+
with:
|
179
|
+
path: |
|
180
|
+
~/.cache/uv
|
181
|
+
.venv
|
182
|
+
key: python-dependencies-ubuntu-latest-3.13-${{ matrix.test-category.name }}-${{ hashFiles('uv.lock') }}
|
183
|
+
restore-keys: |
|
184
|
+
python-dependencies-ubuntu-latest-3.13-
|
185
|
+
|
186
|
+
- name: Install Dependencies
|
187
|
+
run: uv sync --all-extras --dev
|
188
|
+
|
189
|
+
- name: Install System Dependencies
|
190
|
+
if: matrix.test-category.system-deps
|
191
|
+
run: |
|
192
|
+
sudo apt-get update
|
193
|
+
sudo apt-get install -y tesseract-ocr tesseract-ocr-deu pandoc
|
194
|
+
|
195
|
+
- name: Run Tests - ${{ matrix.test-category.name }}
|
196
|
+
run: uv run pytest $(echo "${{ matrix.test-category.path }}" | tr ',' ' ') -v --reruns 1 --reruns-delay 1 --cov=kreuzberg --cov-append --cov-report=lcov:coverage-${{ matrix.test-category.name }}.lcov
|
197
|
+
|
198
|
+
- name: Upload Coverage Artifacts
|
199
|
+
uses: actions/upload-artifact@v4
|
200
|
+
with:
|
201
|
+
name: coverage-${{ matrix.test-category.name }}-${{ github.sha }}
|
202
|
+
path: coverage-${{ matrix.test-category.name }}.lcov
|
203
|
+
retention-days: 1
|
204
|
+
|
205
|
+
coverage-pr:
|
206
|
+
needs: test-pr
|
207
|
+
if: github.event_name == 'pull_request' && always()
|
208
|
+
runs-on: ubuntu-latest
|
209
|
+
timeout-minutes: 10
|
210
|
+
steps:
|
211
|
+
- name: Checkout
|
212
|
+
uses: actions/checkout@v5
|
213
|
+
|
214
|
+
- name: Download Coverage Artifacts
|
215
|
+
uses: actions/download-artifact@v4
|
216
|
+
with:
|
217
|
+
pattern: coverage-*-${{ github.sha }}
|
218
|
+
merge-multiple: true
|
219
|
+
|
220
|
+
- name: Install uv
|
221
|
+
uses: astral-sh/setup-uv@v6
|
222
|
+
with:
|
223
|
+
enable-cache: true
|
224
|
+
|
225
|
+
- name: Install Python
|
226
|
+
uses: actions/setup-python@v6
|
227
|
+
with:
|
228
|
+
python-version: "3.13"
|
229
|
+
|
230
|
+
- name: Install Dependencies
|
231
|
+
run: uv sync --dev
|
232
|
+
|
233
|
+
- name: Combine Coverage Reports
|
234
|
+
run: |
|
235
|
+
# Install lcov for combining reports
|
236
|
+
sudo apt-get update && sudo apt-get install -y lcov
|
237
|
+
|
238
|
+
# List available coverage files
|
239
|
+
echo "Available coverage files:"
|
240
|
+
find . -name "coverage-*.lcov" -type f || echo "No coverage files found"
|
241
|
+
|
242
|
+
# Combine all lcov files if they exist
|
243
|
+
coverage_files=($(find . -name "coverage-*.lcov" -type f))
|
244
|
+
if [ ${#coverage_files[@]} -gt 0 ]; then
|
245
|
+
echo "Combining ${#coverage_files[@]} coverage files..."
|
246
|
+
if [ ${#coverage_files[@]} -eq 1 ]; then
|
247
|
+
# Only one file, just copy it
|
248
|
+
cp "${coverage_files[0]}" coverage.lcov
|
249
|
+
else
|
250
|
+
# Multiple files, combine them
|
251
|
+
lcov --rc branch_coverage=1 $(printf " -a %s" "${coverage_files[@]}") -o coverage.lcov
|
252
|
+
fi
|
253
|
+
else
|
254
|
+
echo "No coverage files to combine, creating empty coverage.lcov"
|
255
|
+
echo "TN:" > coverage.lcov
|
256
|
+
echo "end_of_record" >> coverage.lcov
|
257
|
+
fi
|
258
|
+
|
259
|
+
- name: Upload Coverage to DeepSource
|
260
|
+
if: always()
|
261
|
+
env:
|
262
|
+
DEEPSOURCE_DSN: ${{ secrets.DEEPSOURCE_DSN }}
|
263
|
+
run: |
|
264
|
+
# Install DeepSource CLI
|
265
|
+
curl -fsSL https://deepsource.io/cli | sh
|
266
|
+
# Upload coverage report
|
267
|
+
./bin/deepsource report --analyzer test-coverage --key python --value-file ./coverage.lcov
|
268
|
+
|
269
|
+
test-full:
|
270
|
+
needs: validate
|
271
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.validate.result == 'success'
|
139
272
|
runs-on: ${{ matrix.os }}
|
140
273
|
strategy:
|
141
274
|
fail-fast: false
|
142
275
|
matrix:
|
143
276
|
os: [ubuntu-latest, windows-latest, macos-latest]
|
144
277
|
python: ["3.10", "3.11", "3.12", "3.13"]
|
145
|
-
|
278
|
+
test-category:
|
279
|
+
- name: "core"
|
280
|
+
path: "tests/core,tests/utils"
|
281
|
+
system-deps: false
|
282
|
+
timeout: 20
|
283
|
+
- name: "extractors"
|
284
|
+
path: "tests/extractors"
|
285
|
+
system-deps: true
|
286
|
+
timeout: 25
|
287
|
+
- name: "integration"
|
288
|
+
path: "tests/integration,tests/api"
|
289
|
+
system-deps: true
|
290
|
+
timeout: 30
|
291
|
+
- name: "features"
|
292
|
+
path: "tests/features,tests/interfaces,tests/mcp,tests/multiprocessing,tests/ocr"
|
293
|
+
system-deps: true
|
294
|
+
timeout: 25
|
295
|
+
exclude:
|
296
|
+
- test-category: {name: "extractors"}
|
297
|
+
python: "3.11"
|
298
|
+
- test-category: {name: "extractors"}
|
299
|
+
python: "3.12"
|
300
|
+
- test-category: {name: "integration"}
|
301
|
+
python: "3.11"
|
302
|
+
- test-category: {name: "integration"}
|
303
|
+
python: "3.12"
|
304
|
+
- test-category: {name: "features"}
|
305
|
+
python: "3.11"
|
306
|
+
- test-category: {name: "features"}
|
307
|
+
python: "3.12"
|
308
|
+
timeout-minutes: ${{ matrix.test-category.timeout }}
|
146
309
|
steps:
|
147
310
|
- name: Checkout
|
148
311
|
uses: actions/checkout@v5
|
@@ -153,7 +316,7 @@ jobs:
|
|
153
316
|
enable-cache: true
|
154
317
|
|
155
318
|
- name: Install Python
|
156
|
-
uses: actions/setup-python@
|
319
|
+
uses: actions/setup-python@v6
|
157
320
|
id: setup-python
|
158
321
|
with:
|
159
322
|
python-version: ${{ matrix.python }}
|
@@ -180,7 +343,7 @@ jobs:
|
|
180
343
|
echo "Removing existing .venv directory on Windows"
|
181
344
|
rm -rf .venv
|
182
345
|
fi
|
183
|
-
uv sync --all-
|
346
|
+
uv sync --all-extras --dev
|
184
347
|
shell: bash
|
185
348
|
|
186
349
|
- name: Cache Test Artifacts
|
@@ -190,7 +353,7 @@ jobs:
|
|
190
353
|
key: pytest-cache-${{ matrix.os }}-${{ matrix.python }}
|
191
354
|
|
192
355
|
- name: Cache and Install Homebrew (macOS)
|
193
|
-
if: runner.os == 'macOS'
|
356
|
+
if: runner.os == 'macOS' && matrix.test-category.system-deps
|
194
357
|
uses: nick-fields/retry@v3
|
195
358
|
with:
|
196
359
|
timeout_minutes: 10
|
@@ -204,7 +367,7 @@ jobs:
|
|
204
367
|
shell: bash
|
205
368
|
|
206
369
|
- name: Cache and Install APT Packages (Linux)
|
207
|
-
if: runner.os == 'Linux'
|
370
|
+
if: runner.os == 'Linux' && matrix.test-category.system-deps
|
208
371
|
uses: nick-fields/retry@v3
|
209
372
|
with:
|
210
373
|
timeout_minutes: 5
|
@@ -216,7 +379,7 @@ jobs:
|
|
216
379
|
shell: bash
|
217
380
|
|
218
381
|
- name: Install System Dependencies (Windows)
|
219
|
-
if: runner.os == 'Windows'
|
382
|
+
if: runner.os == 'Windows' && matrix.test-category.system-deps
|
220
383
|
uses: nick-fields/retry@v3
|
221
384
|
with:
|
222
385
|
timeout_minutes: 10
|
@@ -231,12 +394,12 @@ jobs:
|
|
231
394
|
pandoc --version
|
232
395
|
shell: pwsh
|
233
396
|
|
234
|
-
- name: Run Tests
|
397
|
+
- name: Run Tests - ${{ matrix.test-category.name }}
|
235
398
|
uses: nick-fields/retry@v3
|
236
399
|
with:
|
237
|
-
timeout_minutes:
|
238
|
-
max_attempts:
|
239
|
-
retry_wait_seconds:
|
400
|
+
timeout_minutes: 10
|
401
|
+
max_attempts: 2
|
402
|
+
retry_wait_seconds: 5
|
240
403
|
command: |
|
241
|
-
uv run pytest -
|
404
|
+
uv run pytest $(echo "${{ matrix.test-category.path }}" | tr ',' ' ') -v --reruns 1 --reruns-delay 1
|
242
405
|
shell: bash
|
@@ -7,7 +7,7 @@ on:
|
|
7
7
|
jobs:
|
8
8
|
test-docker-images:
|
9
9
|
runs-on: ubuntu-latest
|
10
|
-
timeout-minutes:
|
10
|
+
timeout-minutes: 360
|
11
11
|
strategy:
|
12
12
|
matrix:
|
13
13
|
image:
|
@@ -25,7 +25,7 @@ jobs:
|
|
25
25
|
enable-cache: true
|
26
26
|
|
27
27
|
- name: Set up Python
|
28
|
-
uses: actions/setup-python@
|
28
|
+
uses: actions/setup-python@v6
|
29
29
|
with:
|
30
30
|
python-version-file: "pyproject.toml"
|
31
31
|
|
@@ -48,7 +48,6 @@ jobs:
|
|
48
48
|
sudo rm -rf /opt/ghc
|
49
49
|
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
50
50
|
sudo rm -rf /usr/local/share/boost
|
51
|
-
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
|
52
51
|
|
53
52
|
sudo apt-get clean
|
54
53
|
sudo apt-get autoremove -y
|
@@ -28,6 +28,7 @@ jobs:
|
|
28
28
|
needs: test-images
|
29
29
|
if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'release' }}
|
30
30
|
runs-on: ubuntu-latest
|
31
|
+
timeout-minutes: 360
|
31
32
|
permissions:
|
32
33
|
contents: read
|
33
34
|
packages: write
|
@@ -62,7 +63,6 @@ jobs:
|
|
62
63
|
sudo rm -rf /usr/local/lib/node_modules
|
63
64
|
sudo rm -rf /opt/microsoft
|
64
65
|
sudo rm -rf /usr/local/.ghcup
|
65
|
-
sudo rm -rf /opt/hostedtoolcache
|
66
66
|
|
67
67
|
# Clean apt
|
68
68
|
sudo apt-get clean
|
@@ -5,11 +5,6 @@ repos:
|
|
5
5
|
- id: commitlint
|
6
6
|
stages: [commit-msg]
|
7
7
|
additional_dependencies: ["@commitlint/config-conventional"]
|
8
|
-
- repo: https://github.com/Goldziher/ai-rulez
|
9
|
-
rev: v1.6.1
|
10
|
-
hooks:
|
11
|
-
- id: ai-rulez-validate
|
12
|
-
- id: ai-rulez-generate
|
13
8
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
14
9
|
rev: v6.0.0
|
15
10
|
hooks:
|
@@ -37,7 +32,7 @@ repos:
|
|
37
32
|
hooks:
|
38
33
|
- id: markdownlint-fix
|
39
34
|
- repo: https://github.com/adamchainz/blacken-docs
|
40
|
-
rev: 1.
|
35
|
+
rev: 1.20.0
|
41
36
|
hooks:
|
42
37
|
- id: blacken-docs
|
43
38
|
args: ["--pyi", "--line-length", "130"]
|
@@ -53,7 +48,7 @@ repos:
|
|
53
48
|
hooks:
|
54
49
|
- id: pyproject-fmt
|
55
50
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
56
|
-
rev: v0.
|
51
|
+
rev: v0.13.0
|
57
52
|
hooks:
|
58
53
|
- id: ruff
|
59
54
|
args: ["--fix", "--unsafe-fixes"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.14.0
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -31,15 +31,15 @@ Requires-Python: >=3.10
|
|
31
31
|
Requires-Dist: anyio>=4.10.0
|
32
32
|
Requires-Dist: chardetng-py>=0.3.5
|
33
33
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
34
|
-
Requires-Dist: html-to-markdown[lxml]>=1.
|
35
|
-
Requires-Dist: mcp>=1.
|
34
|
+
Requires-Dist: html-to-markdown[lxml]>=1.11.0
|
35
|
+
Requires-Dist: mcp>=1.14.0
|
36
36
|
Requires-Dist: msgspec>=0.18.0
|
37
|
-
Requires-Dist: numpy>=
|
37
|
+
Requires-Dist: numpy>=2.0.0
|
38
38
|
Requires-Dist: playa-pdf>=0.7.0
|
39
|
-
Requires-Dist: polars>=1.33.
|
39
|
+
Requires-Dist: polars>=1.33.1
|
40
40
|
Requires-Dist: psutil>=7.0.0
|
41
41
|
Requires-Dist: pypdfium2==4.30.0
|
42
|
-
Requires-Dist: python-calamine>=0.5.
|
42
|
+
Requires-Dist: python-calamine>=0.5.3
|
43
43
|
Requires-Dist: python-pptx>=1.0.2
|
44
44
|
Requires-Dist: typing-extensions>=4.15.0; python_version < '3.12'
|
45
45
|
Provides-Extra: additional-extensions
|
@@ -55,17 +55,17 @@ Requires-Dist: keybert>=0.9.0; extra == 'all'
|
|
55
55
|
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
|
56
56
|
Requires-Dist: mailparse>=1.0.15; extra == 'all'
|
57
57
|
Requires-Dist: paddleocr>=3.2.0; extra == 'all'
|
58
|
-
Requires-Dist: paddlepaddle>=3.
|
58
|
+
Requires-Dist: paddlepaddle>=3.2.0; extra == 'all'
|
59
59
|
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
|
60
60
|
Requires-Dist: rich>=14.1.0; extra == 'all'
|
61
|
-
Requires-Dist: semantic-text-splitter>=0.
|
61
|
+
Requires-Dist: semantic-text-splitter>=0.28.0; extra == 'all'
|
62
62
|
Requires-Dist: setuptools>=80.9.0; extra == 'all'
|
63
63
|
Requires-Dist: spacy>=3.8.7; extra == 'all'
|
64
64
|
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
|
65
65
|
Provides-Extra: api
|
66
66
|
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'api'
|
67
67
|
Provides-Extra: chunking
|
68
|
-
Requires-Dist: semantic-text-splitter>=0.
|
68
|
+
Requires-Dist: semantic-text-splitter>=0.28.0; extra == 'chunking'
|
69
69
|
Provides-Extra: cli
|
70
70
|
Requires-Dist: click>=8.2.1; extra == 'cli'
|
71
71
|
Requires-Dist: rich>=14.1.0; extra == 'cli'
|
@@ -85,7 +85,7 @@ Provides-Extra: langdetect
|
|
85
85
|
Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
|
86
86
|
Provides-Extra: paddleocr
|
87
87
|
Requires-Dist: paddleocr>=3.2.0; extra == 'paddleocr'
|
88
|
-
Requires-Dist: paddlepaddle>=3.
|
88
|
+
Requires-Dist: paddlepaddle>=3.2.0; extra == 'paddleocr'
|
89
89
|
Requires-Dist: setuptools>=80.9.0; extra == 'paddleocr'
|
90
90
|
Description-Content-Type: text/markdown
|
91
91
|
|