kreuzberg 3.11.2__tar.gz → 3.11.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg-3.11.4/.docker/Dockerfile +58 -0
- kreuzberg-3.11.4/.github/workflows/docker-e2e-tests.yml +152 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.github/workflows/publish-docker.yml +10 -8
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.gitignore +4 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.pre-commit-config.yaml +2 -2
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/PKG-INFO +7 -7
- kreuzberg-3.11.4/Taskfile.yml +160 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/ai-rulez.yaml +327 -11
- kreuzberg-3.11.4/docker-compose.example.yml +37 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/getting-started/installation.md +25 -0
- kreuzberg-3.11.4/docs/user-guide/docker.md +417 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/ocr-backends.md +5 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/cli.py +1 -3
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/pyproject.toml +11 -4
- kreuzberg-3.11.4/tests/e2e/docker_images_test.py +599 -0
- kreuzberg-3.11.4/tests/test_source_files/contract.txt +1 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/uv.lock +529 -1525
- kreuzberg-3.11.2/.docker/Dockerfile +0 -21
- kreuzberg-3.11.2/docs/user-guide/docker.md +0 -389
- kreuzberg-3.11.2/tests/test_source_files/contract.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.commitlintrc +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.deepsource.toml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.docker/README.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.dockerignore +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.github/dependabot.yaml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.github/workflows/ci.yaml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.github/workflows/docs.yml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.github/workflows/pr-title.yaml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.github/workflows/release.yaml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/.markdownlint.yaml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/LICENSE +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/README.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/README.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/benchmark_baseline.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/end_to_end_benchmark.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/final_benchmark.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/pyproject.toml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/baseline_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/benchmark_msgpack_20250702_003800.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/comprehensive_caching_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/final_benchmark_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/latest.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/mime_caching_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/msgspec_caching_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/ocr_caching_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/serialization_benchmark_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/statistical_benchmark_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/results/table_caching_results.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/serialization_benchmark.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/__main__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/benchmarks.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/cli.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/models.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/profiler.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/runner.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/benchmarks/statistical_benchmark.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/advanced/custom-extractors.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/advanced/custom-hooks.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/advanced/error-handling.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/advanced/index.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/advanced/performance.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/api-reference/exceptions.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/api-reference/extraction-functions.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/api-reference/extractor-registry.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/api-reference/index.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/api-reference/ocr-configuration.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/api-reference/types.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/assets/favicon.png +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/assets/logo.png +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/cli.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/contributing.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/css/extra.css +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/examples/extraction-examples.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/examples/index.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/getting-started/index.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/getting-started/quick-start.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/index.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/performance-analysis.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/api-server.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/basic-usage.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/chunking.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/document-classification.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/extraction-configuration.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/index.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/mcp-server.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/metadata-extraction.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/ocr-configuration.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/docs/user-guide/supported-formats.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/__main__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_api/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_api/main.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_chunker.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_config.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_constants.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_document_classification.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_entity_extraction.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_base.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_email.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_html.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_image.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_pandoc.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_pdf.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_presentation.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_spread_sheet.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_extractors/_structured.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_gmft.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_language_detection.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_mcp/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_mcp/server.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_mime_types.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_ocr/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_ocr/_base.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_ocr/_easyocr.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_ocr/_paddleocr.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_ocr/_tesseract.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_playa.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_registry.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_types.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_cache.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_device.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_document_cache.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_errors.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_pdf_lock.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_process_pool.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_quality.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_serialization.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_string.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_sync.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_table.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/_utils/_tmp.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/exceptions.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/extraction.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/kreuzberg/py.typed +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/mkdocs.yaml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/api/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/api/main_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/chunker_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/cli_command_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/cli_integration_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/cli_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/config_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/conftest.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/document_classification_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/entity_extraction_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/exceptions_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extraction_batch_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extraction_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/email_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/html_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/image_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/pandoc_metadata_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/pandoc_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/pdf_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/presentation_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/spreed_sheet_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/extractors/structured_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/gmft_extended_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/gmft_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/hooks_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/language_detection_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/mcp_server_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/mime_types_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/multiprocessing/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/multiprocessing/gmft_integration_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/multiprocessing/gmft_isolated_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/multiprocessing/process_manager_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/multiprocessing/tesseract_pool_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/ocr/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/ocr/base_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/ocr/device_integration_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/ocr/easyocr_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/ocr/init_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/ocr/paddleocr_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/ocr/tesseract_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/playa_helpers_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/playa_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/registry_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/better-ocr-image.jpg +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/contract_test.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/document.docx +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/email/sample-email.eml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/excel.xlsx +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/form_test.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/french-text.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/german-text.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/html.html +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/images/test_hello_world.png +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/invoice_image.png +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/invoice_test.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/json/sample-document.json +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/layout-parser-ocr.jpg +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/markdown.md +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/non-ascii-text.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/non-searchable.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/ocr-image.jpg +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/receipt_test.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/report_test.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/sample-contract.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/scanned.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/searchable.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/spanish-text.txt +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/test-article.pdf +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/test_source_files/yaml/sample-config.yaml +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/types_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/__init__.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/cache_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/device_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/errors_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/pdf_lock_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/process_pool_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/serialization_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/string_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/sync_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/table_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils/tmp_test.py +0 -0
- {kreuzberg-3.11.2 → kreuzberg-3.11.4}/tests/utils_errors_test.py +0 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
# Build stage
|
2
|
+
FROM ghcr.io/astral-sh/uv:python3.13-bookworm AS builder
|
3
|
+
ARG EXTRAS=""
|
4
|
+
WORKDIR /app
|
5
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
6
|
+
ENV PYTHONUNBUFFERED=1
|
7
|
+
ENV UV_LINK_MODE=copy
|
8
|
+
ENV UV_CACHE_DIR=/tmp/uv-cache
|
9
|
+
|
10
|
+
# Copy dependency files
|
11
|
+
COPY pyproject.toml uv.lock README.md ./
|
12
|
+
COPY kreuzberg kreuzberg
|
13
|
+
|
14
|
+
# Install dependencies with optimizations
|
15
|
+
RUN --mount=type=cache,target=/tmp/uv-cache \
|
16
|
+
uv sync --extra api${EXTRAS:+ --extra ${EXTRAS}} --no-editable --no-dev --compile-bytecode && \
|
17
|
+
rm -rf /app/.venv/lib/python*/site-packages/**/__pycache__ && \
|
18
|
+
find /app/.venv -type f -name "*.pyc" -delete && \
|
19
|
+
find /app/.venv -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true && \
|
20
|
+
find /app/.venv -type d -name "test" -exec rm -rf {} + 2>/dev/null || true
|
21
|
+
|
22
|
+
# Runtime stage
|
23
|
+
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS runtime
|
24
|
+
ARG EXTRAS=""
|
25
|
+
WORKDIR /app
|
26
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
27
|
+
ENV PYTHONUNBUFFERED=1
|
28
|
+
ENV PATH="/app/.venv/bin:$PATH"
|
29
|
+
|
30
|
+
# Install runtime dependencies
|
31
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
32
|
+
pandoc \
|
33
|
+
tesseract-ocr \
|
34
|
+
tesseract-ocr-eng \
|
35
|
+
tesseract-ocr-osd \
|
36
|
+
libglib2.0-0 \
|
37
|
+
libsm6 \
|
38
|
+
libxext6 \
|
39
|
+
libxrender-dev \
|
40
|
+
libgomp1 \
|
41
|
+
libgl1 \
|
42
|
+
libglib2.0-0 \
|
43
|
+
&& apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
44
|
+
|
45
|
+
# Copy virtual environment from builder
|
46
|
+
COPY --from=builder /app/.venv /app/.venv
|
47
|
+
COPY --from=builder /app/kreuzberg /app/kreuzberg
|
48
|
+
|
49
|
+
# Create non-root user and cache directory
|
50
|
+
RUN groupadd -r appuser && useradd -r -g appuser -d /app -s /sbin/nologin appuser && \
|
51
|
+
mkdir -p /app/.kreuzberg && \
|
52
|
+
chown -R appuser:appuser /app
|
53
|
+
|
54
|
+
# Set default cache directory to prevent permission issues
|
55
|
+
ENV KREUZBERG_CACHE_DIR=/app/.kreuzberg
|
56
|
+
|
57
|
+
USER appuser
|
58
|
+
CMD ["litestar", "--app", "kreuzberg._api.main:app", "run", "--host", "0.0.0.0"]
|
@@ -0,0 +1,152 @@
|
|
1
|
+
name: Docker E2E Tests
|
2
|
+
|
3
|
+
on:
|
4
|
+
workflow_dispatch:
|
5
|
+
workflow_call:
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
test-docker-images:
|
9
|
+
runs-on: ubuntu-latest
|
10
|
+
timeout-minutes: 60
|
11
|
+
strategy:
|
12
|
+
matrix:
|
13
|
+
image:
|
14
|
+
- { name: "core", extras: "" }
|
15
|
+
- { name: "easyocr", extras: "easyocr" }
|
16
|
+
- { name: "paddle", extras: "paddleocr" }
|
17
|
+
- { name: "gmft", extras: "gmft" }
|
18
|
+
- { name: "all", extras: "all" }
|
19
|
+
fail-fast: false
|
20
|
+
|
21
|
+
steps:
|
22
|
+
- name: Checkout repository
|
23
|
+
uses: actions/checkout@v5
|
24
|
+
|
25
|
+
- name: Install uv
|
26
|
+
uses: astral-sh/setup-uv@v6
|
27
|
+
with:
|
28
|
+
enable-cache: true
|
29
|
+
|
30
|
+
- name: Set up Python
|
31
|
+
uses: actions/setup-python@v5
|
32
|
+
with:
|
33
|
+
python-version-file: "pyproject.toml"
|
34
|
+
|
35
|
+
- name: Install test dependencies
|
36
|
+
run: |
|
37
|
+
uv pip install --system asyncio
|
38
|
+
|
39
|
+
- name: Install system dependencies
|
40
|
+
run: |
|
41
|
+
sudo apt-get update
|
42
|
+
sudo apt-get install -y pandoc tesseract-ocr jq bc
|
43
|
+
|
44
|
+
- name: Free up disk space
|
45
|
+
run: |
|
46
|
+
echo "Initial disk usage:"
|
47
|
+
df -h
|
48
|
+
|
49
|
+
sudo rm -rf /usr/share/dotnet
|
50
|
+
sudo rm -rf /usr/local/lib/android
|
51
|
+
sudo rm -rf /opt/ghc
|
52
|
+
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
53
|
+
sudo rm -rf /usr/local/share/boost
|
54
|
+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
|
55
|
+
|
56
|
+
sudo apt-get clean
|
57
|
+
sudo apt-get autoremove -y
|
58
|
+
|
59
|
+
docker system prune -af --volumes
|
60
|
+
|
61
|
+
sudo swapoff -a
|
62
|
+
sudo rm -f /swapfile
|
63
|
+
|
64
|
+
echo "Available disk space after cleanup:"
|
65
|
+
df -h
|
66
|
+
|
67
|
+
- name: Set up Docker Buildx
|
68
|
+
uses: docker/setup-buildx-action@v3
|
69
|
+
with:
|
70
|
+
driver-opts: |
|
71
|
+
image=moby/buildkit:latest
|
72
|
+
network=host
|
73
|
+
|
74
|
+
- name: Configure Docker
|
75
|
+
run: |
|
76
|
+
sudo tee /etc/docker/daemon.json > /dev/null <<EOF
|
77
|
+
{
|
78
|
+
"max-concurrent-downloads": 10,
|
79
|
+
"max-concurrent-uploads": 10,
|
80
|
+
"storage-driver": "overlay2"
|
81
|
+
}
|
82
|
+
EOF
|
83
|
+
|
84
|
+
sudo systemctl restart docker
|
85
|
+
docker info
|
86
|
+
|
87
|
+
- name: Build Docker image - ${{ matrix.image.name }}
|
88
|
+
run: |
|
89
|
+
export DOCKER_BUILDKIT=1
|
90
|
+
export BUILDKIT_PROGRESS=plain
|
91
|
+
|
92
|
+
echo "Building ${{ matrix.image.name }} image..."
|
93
|
+
docker build -f .docker/Dockerfile \
|
94
|
+
--build-arg EXTRAS="${{ matrix.image.extras }}" \
|
95
|
+
-t kreuzberg:${{ matrix.image.name }} \
|
96
|
+
--cache-from type=gha \
|
97
|
+
--cache-to type=gha,mode=max \
|
98
|
+
--load \
|
99
|
+
.
|
100
|
+
|
101
|
+
echo "Built image:"
|
102
|
+
docker images --format "table {{.Repository}}:{{.Tag}}\t{{.Size}}" | grep kreuzberg:${{ matrix.image.name }} || true
|
103
|
+
|
104
|
+
- name: Run E2E tests - ${{ matrix.image.name }}
|
105
|
+
run: |
|
106
|
+
mkdir -p tests/e2e/logs
|
107
|
+
echo "Running E2E tests for ${{ matrix.image.name }}..."
|
108
|
+
python3 tests/e2e/docker_images_test.py --image ${{ matrix.image.name }}
|
109
|
+
|
110
|
+
- name: Generate test report - ${{ matrix.image.name }}
|
111
|
+
if: always()
|
112
|
+
run: |
|
113
|
+
if [ -f "tests/e2e/test_report.json" ]; then
|
114
|
+
echo "## Test Report Summary for ${{ matrix.image.name }}" >> $GITHUB_STEP_SUMMARY
|
115
|
+
echo "\`\`\`json" >> $GITHUB_STEP_SUMMARY
|
116
|
+
jq '.["${{ matrix.image.name }}"]' tests/e2e/test_report.json >> $GITHUB_STEP_SUMMARY || echo "No results for ${{ matrix.image.name }}" >> $GITHUB_STEP_SUMMARY
|
117
|
+
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
118
|
+
fi
|
119
|
+
|
120
|
+
- name: Collect Docker logs on failure
|
121
|
+
if: failure()
|
122
|
+
run: |
|
123
|
+
mkdir -p tests/e2e/docker-logs
|
124
|
+
|
125
|
+
for container in $(docker ps -a --filter "name=kreuzberg-test" --format "{{.Names}}"); do
|
126
|
+
docker logs "$container" > "tests/e2e/docker-logs/${container}.log" 2>&1 || true
|
127
|
+
done
|
128
|
+
|
129
|
+
docker info > tests/e2e/docker-logs/docker-info.txt 2>&1 || true
|
130
|
+
docker version > tests/e2e/docker-logs/docker-version.txt 2>&1 || true
|
131
|
+
|
132
|
+
- name: Upload test results - ${{ matrix.image.name }}
|
133
|
+
if: always()
|
134
|
+
uses: actions/upload-artifact@v4
|
135
|
+
with:
|
136
|
+
name: docker-e2e-test-results-${{ matrix.image.name }}
|
137
|
+
path: |
|
138
|
+
tests/e2e/logs/
|
139
|
+
tests/e2e/*.log
|
140
|
+
tests/e2e/*.json
|
141
|
+
tests/e2e/docker-logs/
|
142
|
+
retention-days: 7
|
143
|
+
|
144
|
+
- name: Clean up Docker resources
|
145
|
+
if: always()
|
146
|
+
run: |
|
147
|
+
docker ps -aq --filter "name=kreuzberg-test" | xargs -r docker rm -f || true
|
148
|
+
docker rmi kreuzberg:${{ matrix.image.name }} || true
|
149
|
+
docker system prune -af --volumes || true
|
150
|
+
|
151
|
+
echo "Final disk usage after ${{ matrix.image.name }}:"
|
152
|
+
df -h
|
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
name: Publish Docker Images
|
3
2
|
|
4
3
|
on:
|
@@ -7,12 +6,20 @@ on:
|
|
7
6
|
types: [published]
|
8
7
|
|
9
8
|
jobs:
|
10
|
-
|
9
|
+
# Run E2E tests first
|
10
|
+
test-images:
|
11
|
+
uses: ./.github/workflows/docker-e2e-tests.yml
|
12
|
+
|
13
|
+
# Build and publish images after tests pass
|
14
|
+
build-and-publish:
|
15
|
+
needs: test-images
|
11
16
|
runs-on: ubuntu-latest
|
12
17
|
if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'release' }}
|
13
18
|
permissions:
|
14
19
|
contents: read
|
15
20
|
packages: write
|
21
|
+
outputs:
|
22
|
+
version: ${{ steps.get_version.outputs.VERSION }}
|
16
23
|
|
17
24
|
strategy:
|
18
25
|
max-parallel: 2
|
@@ -37,7 +44,6 @@ jobs:
|
|
37
44
|
steps:
|
38
45
|
- name: Free up disk space
|
39
46
|
run: |
|
40
|
-
# Remove large unnecessary packages to free up space
|
41
47
|
sudo rm -rf /usr/share/dotnet
|
42
48
|
sudo rm -rf /usr/local/lib/android
|
43
49
|
sudo rm -rf /opt/ghc
|
@@ -54,10 +60,8 @@ jobs:
|
|
54
60
|
id: get_version
|
55
61
|
run: |
|
56
62
|
if [ "${{ github.event_name }}" = "release" ]; then
|
57
|
-
# For release events, use the release tag
|
58
63
|
VERSION="${{ github.event.release.tag_name }}"
|
59
64
|
else
|
60
|
-
# For workflow_dispatch, get the latest tag
|
61
65
|
git fetch --tags
|
62
66
|
VERSION=$(git tag --sort=-version:refname | head -n1)
|
63
67
|
fi
|
@@ -82,12 +86,10 @@ jobs:
|
|
82
86
|
with:
|
83
87
|
images: goldziher/kreuzberg
|
84
88
|
tags: |
|
85
|
-
# Release version tag (e.g., v3.0.0-easyocr)
|
86
89
|
type=raw,value=${{ steps.get_version.outputs.VERSION }}${{ matrix.tag_suffix }}
|
87
|
-
# Latest tag for each variant (e.g., latest-easyocr)
|
88
90
|
type=raw,value=latest${{ matrix.tag_suffix }}
|
89
91
|
|
90
|
-
- name: Build and push Docker image
|
92
|
+
- name: Build and push Docker image to Docker Hub
|
91
93
|
uses: docker/build-push-action@v6
|
92
94
|
with:
|
93
95
|
context: .
|
@@ -6,7 +6,7 @@ repos:
|
|
6
6
|
stages: [commit-msg]
|
7
7
|
additional_dependencies: ["@commitlint/config-conventional"]
|
8
8
|
- repo: https://github.com/Goldziher/ai-rulez
|
9
|
-
rev: v1.
|
9
|
+
rev: v1.6.0
|
10
10
|
hooks:
|
11
11
|
- id: ai-rulez-validate
|
12
12
|
- id: ai-rulez-generate
|
@@ -53,7 +53,7 @@ repos:
|
|
53
53
|
hooks:
|
54
54
|
- id: pyproject-fmt
|
55
55
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
56
|
-
rev: v0.12.
|
56
|
+
rev: v0.12.10
|
57
57
|
hooks:
|
58
58
|
- id: ruff
|
59
59
|
args: ["--fix", "--unsafe-fixes"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.11.
|
3
|
+
Version: 3.11.4
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -52,9 +52,9 @@ Requires-Dist: gmft>=0.4.2; extra == 'all'
|
|
52
52
|
Requires-Dist: keybert>=0.9.0; extra == 'all'
|
53
53
|
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
|
54
54
|
Requires-Dist: mailparse>=1.0.15; extra == 'all'
|
55
|
-
Requires-Dist: paddleocr>=3.
|
56
|
-
Requires-Dist: paddlepaddle>=3.1.
|
57
|
-
Requires-Dist: pandas>=2.3.
|
55
|
+
Requires-Dist: paddleocr>=3.2.0; extra == 'all'
|
56
|
+
Requires-Dist: paddlepaddle>=3.1.1; extra == 'all'
|
57
|
+
Requires-Dist: pandas>=2.3.2; extra == 'all'
|
58
58
|
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
|
59
59
|
Requires-Dist: rich>=14.1.0; extra == 'all'
|
60
60
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
|
@@ -73,7 +73,7 @@ Provides-Extra: crypto
|
|
73
73
|
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'crypto'
|
74
74
|
Provides-Extra: document-classification
|
75
75
|
Requires-Dist: deep-translator>=1.11.4; extra == 'document-classification'
|
76
|
-
Requires-Dist: pandas>=2.3.
|
76
|
+
Requires-Dist: pandas>=2.3.2; extra == 'document-classification'
|
77
77
|
Provides-Extra: easyocr
|
78
78
|
Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
|
79
79
|
Provides-Extra: entity-extraction
|
@@ -84,8 +84,8 @@ Requires-Dist: gmft>=0.4.2; extra == 'gmft'
|
|
84
84
|
Provides-Extra: langdetect
|
85
85
|
Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
|
86
86
|
Provides-Extra: paddleocr
|
87
|
-
Requires-Dist: paddleocr>=3.
|
88
|
-
Requires-Dist: paddlepaddle>=3.1.
|
87
|
+
Requires-Dist: paddleocr>=3.2.0; extra == 'paddleocr'
|
88
|
+
Requires-Dist: paddlepaddle>=3.1.1; extra == 'paddleocr'
|
89
89
|
Requires-Dist: setuptools>=80.9.0; extra == 'paddleocr'
|
90
90
|
Description-Content-Type: text/markdown
|
91
91
|
|
@@ -0,0 +1,160 @@
|
|
1
|
+
version: "3"
|
2
|
+
|
3
|
+
env:
|
4
|
+
DOCKER_BUILDKIT: 1
|
5
|
+
BUILDKIT_PROGRESS: plain
|
6
|
+
|
7
|
+
vars:
|
8
|
+
DOCKER_DIR: .docker
|
9
|
+
DOCKERFILE: "{{.DOCKER_DIR}}/Dockerfile"
|
10
|
+
TEST_DIR: tests/e2e
|
11
|
+
LOG_DIR: "{{.TEST_DIR}}/logs"
|
12
|
+
|
13
|
+
tasks:
|
14
|
+
# Main task for Docker E2E testing
|
15
|
+
docker:e2e:
|
16
|
+
desc: "Build Docker images and run E2E tests"
|
17
|
+
deps:
|
18
|
+
- docker:build
|
19
|
+
cmds:
|
20
|
+
- uv run python {{.TEST_DIR}}/docker_images_test.py
|
21
|
+
|
22
|
+
# Docker build tasks
|
23
|
+
docker:build:
|
24
|
+
desc: "Build all Docker images for testing"
|
25
|
+
deps:
|
26
|
+
- docker:build:core
|
27
|
+
- docker:build:easyocr
|
28
|
+
- docker:build:paddle
|
29
|
+
- docker:build:gmft
|
30
|
+
cmds:
|
31
|
+
- docker images --format "table {{.Repository}}:{{.Tag}}\t{{.Size}}" | grep kreuzberg || true
|
32
|
+
|
33
|
+
docker:build:core:
|
34
|
+
desc: "Build core Docker image"
|
35
|
+
cmds:
|
36
|
+
- docker build -f {{.DOCKERFILE}} --build-arg EXTRAS="" -t kreuzberg:core .
|
37
|
+
sources:
|
38
|
+
- "{{.DOCKERFILE}}"
|
39
|
+
- kreuzberg/**/*.py
|
40
|
+
- pyproject.toml
|
41
|
+
|
42
|
+
docker:build:easyocr:
|
43
|
+
desc: "Build EasyOCR Docker image"
|
44
|
+
cmds:
|
45
|
+
- docker build -f {{.DOCKERFILE}} --build-arg EXTRAS="easyocr" -t kreuzberg:easyocr .
|
46
|
+
sources:
|
47
|
+
- "{{.DOCKERFILE}}"
|
48
|
+
- kreuzberg/**/*.py
|
49
|
+
- pyproject.toml
|
50
|
+
|
51
|
+
docker:build:paddle:
|
52
|
+
desc: "Build PaddleOCR Docker image"
|
53
|
+
cmds:
|
54
|
+
- docker build -f {{.DOCKERFILE}} --build-arg EXTRAS="paddleocr" -t kreuzberg:paddle .
|
55
|
+
sources:
|
56
|
+
- "{{.DOCKERFILE}}"
|
57
|
+
- kreuzberg/**/*.py
|
58
|
+
- pyproject.toml
|
59
|
+
|
60
|
+
docker:build:gmft:
|
61
|
+
desc: "Build GMFT Docker image"
|
62
|
+
cmds:
|
63
|
+
- docker build -f {{.DOCKERFILE}} --build-arg EXTRAS="gmft" -t kreuzberg:gmft .
|
64
|
+
sources:
|
65
|
+
- "{{.DOCKERFILE}}"
|
66
|
+
- kreuzberg/**/*.py
|
67
|
+
- pyproject.toml
|
68
|
+
|
69
|
+
# Test runner variants
|
70
|
+
docker:test:
|
71
|
+
desc: "Run Docker E2E tests (images must be built)"
|
72
|
+
cmds:
|
73
|
+
- uv run python {{.TEST_DIR}}/docker_images_test.py
|
74
|
+
|
75
|
+
# Utility tasks
|
76
|
+
docker:clean:
|
77
|
+
desc: "Clean up Docker test images and containers"
|
78
|
+
cmds:
|
79
|
+
- docker ps -aq --filter "name=kreuzberg-test" | xargs -r docker rm -f 2>/dev/null || true
|
80
|
+
- docker rmi kreuzberg:core kreuzberg:easyocr kreuzberg:paddle kreuzberg:gmft 2>/dev/null || true
|
81
|
+
- docker system prune -f
|
82
|
+
|
83
|
+
docker:logs:
|
84
|
+
desc: "Show logs from test containers"
|
85
|
+
cmds:
|
86
|
+
- docker ps -a --filter "name=kreuzberg-test" --format "table {{.Names}}\t{{.Status}}"
|
87
|
+
- for container in $(docker ps -a --filter "name=kreuzberg-test" --format "{{.Names}}"); do echo "=== $container ==="; docker logs --tail 50 "$container" 2>&1 || true; done
|
88
|
+
|
89
|
+
# Development tasks
|
90
|
+
install:
|
91
|
+
desc: "Install dependencies with uv"
|
92
|
+
cmds:
|
93
|
+
- uv sync
|
94
|
+
|
95
|
+
install:all:
|
96
|
+
desc: "Install all optional dependencies with uv"
|
97
|
+
cmds:
|
98
|
+
- uv sync --all-extras
|
99
|
+
|
100
|
+
test:
|
101
|
+
desc: "Run tests with pytest"
|
102
|
+
cmds:
|
103
|
+
- uv run pytest
|
104
|
+
|
105
|
+
test:cov:
|
106
|
+
desc: "Run tests with coverage"
|
107
|
+
cmds:
|
108
|
+
- uv run pytest --cov
|
109
|
+
|
110
|
+
format:
|
111
|
+
desc: "Format code with ruff"
|
112
|
+
cmds:
|
113
|
+
- uv run ruff format
|
114
|
+
|
115
|
+
lint:
|
116
|
+
desc: "Lint code with ruff"
|
117
|
+
cmds:
|
118
|
+
- uv run ruff check
|
119
|
+
|
120
|
+
lint:fix:
|
121
|
+
desc: "Fix linting issues with ruff"
|
122
|
+
cmds:
|
123
|
+
- uv run ruff check --fix
|
124
|
+
|
125
|
+
typecheck:
|
126
|
+
desc: "Type check with mypy"
|
127
|
+
cmds:
|
128
|
+
- uv run mypy
|
129
|
+
|
130
|
+
# Documentation tasks
|
131
|
+
docs:build:
|
132
|
+
desc: "Build documentation"
|
133
|
+
cmds:
|
134
|
+
- uv run mkdocs build --clean --strict
|
135
|
+
|
136
|
+
docs:serve:
|
137
|
+
desc: "Serve documentation locally"
|
138
|
+
cmds:
|
139
|
+
- uv run mkdocs serve
|
140
|
+
|
141
|
+
# CI/CD tasks
|
142
|
+
ci:
|
143
|
+
desc: "Run full CI pipeline locally"
|
144
|
+
deps:
|
145
|
+
- format
|
146
|
+
- lint
|
147
|
+
- typecheck
|
148
|
+
- test:cov
|
149
|
+
|
150
|
+
ci:docker:
|
151
|
+
desc: "Run Docker CI pipeline"
|
152
|
+
deps:
|
153
|
+
- docker:build
|
154
|
+
- docker:test
|
155
|
+
|
156
|
+
# Default task
|
157
|
+
default:
|
158
|
+
desc: "Show available tasks"
|
159
|
+
cmds:
|
160
|
+
- task --list
|