kreuzberg 3.11.3__tar.gz → 3.11.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.docker/Dockerfile +5 -1
  2. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.github/workflows/docker-e2e-tests.yml +1 -0
  3. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.gitignore +3 -0
  4. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/PKG-INFO +1 -1
  5. kreuzberg-3.11.4/docker-compose.example.yml +37 -0
  6. kreuzberg-3.11.4/docs/user-guide/docker.md +417 -0
  7. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/cli.py +1 -3
  8. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/pyproject.toml +2 -4
  9. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/e2e/docker_images_test.py +6 -7
  10. kreuzberg-3.11.4/tests/test_source_files/contract.txt +1 -0
  11. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/uv.lock +2 -2
  12. kreuzberg-3.11.3/.task/checksum/docker-build-core +0 -1
  13. kreuzberg-3.11.3/.task/checksum/docker-build-easyocr +0 -1
  14. kreuzberg-3.11.3/.task/checksum/docker-build-gmft +0 -1
  15. kreuzberg-3.11.3/.task/checksum/docker-build-paddle +0 -1
  16. kreuzberg-3.11.3/docs/user-guide/docker.md +0 -389
  17. kreuzberg-3.11.3/tests/e2e/run_docker_tests.sh +0 -371
  18. kreuzberg-3.11.3/tests/e2e/test_report.json +0 -14
  19. kreuzberg-3.11.3/tests/test_source_files/contract.txt +0 -0
  20. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.commitlintrc +0 -0
  21. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.deepsource.toml +0 -0
  22. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.docker/README.md +0 -0
  23. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.dockerignore +0 -0
  24. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.github/dependabot.yaml +0 -0
  25. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.github/workflows/ci.yaml +0 -0
  26. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.github/workflows/docs.yml +0 -0
  27. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.github/workflows/pr-title.yaml +0 -0
  28. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.github/workflows/publish-docker.yml +0 -0
  29. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.github/workflows/release.yaml +0 -0
  30. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.markdownlint.yaml +0 -0
  31. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/.pre-commit-config.yaml +0 -0
  32. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/LICENSE +0 -0
  33. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/README.md +0 -0
  34. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/Taskfile.yml +0 -0
  35. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/ai-rulez.yaml +0 -0
  36. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/README.md +0 -0
  37. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/benchmark_baseline.py +0 -0
  38. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/end_to_end_benchmark.py +0 -0
  39. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/final_benchmark.py +0 -0
  40. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/pyproject.toml +0 -0
  41. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/baseline_results.json +0 -0
  42. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/benchmark_msgpack_20250702_003800.json +0 -0
  43. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/comprehensive_caching_results.json +0 -0
  44. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/final_benchmark_results.json +0 -0
  45. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/latest.json +0 -0
  46. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/mime_caching_results.json +0 -0
  47. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/msgspec_caching_results.json +0 -0
  48. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/ocr_caching_results.json +0 -0
  49. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/serialization_benchmark_results.json +0 -0
  50. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/statistical_benchmark_results.json +0 -0
  51. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/results/table_caching_results.json +0 -0
  52. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/serialization_benchmark.py +0 -0
  53. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/__init__.py +0 -0
  54. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/__main__.py +0 -0
  55. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/benchmarks.py +0 -0
  56. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/cli.py +0 -0
  57. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/models.py +0 -0
  58. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/profiler.py +0 -0
  59. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/src/kreuzberg_benchmarks/runner.py +0 -0
  60. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/benchmarks/statistical_benchmark.py +0 -0
  61. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/advanced/custom-extractors.md +0 -0
  62. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/advanced/custom-hooks.md +0 -0
  63. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/advanced/error-handling.md +0 -0
  64. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/advanced/index.md +0 -0
  65. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/advanced/performance.md +0 -0
  66. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/api-reference/exceptions.md +0 -0
  67. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/api-reference/extraction-functions.md +0 -0
  68. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/api-reference/extractor-registry.md +0 -0
  69. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/api-reference/index.md +0 -0
  70. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/api-reference/ocr-configuration.md +0 -0
  71. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/api-reference/types.md +0 -0
  72. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/assets/favicon.png +0 -0
  73. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/assets/logo.png +0 -0
  74. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/cli.md +0 -0
  75. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/contributing.md +0 -0
  76. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/css/extra.css +0 -0
  77. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/examples/extraction-examples.md +0 -0
  78. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/examples/index.md +0 -0
  79. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/getting-started/index.md +0 -0
  80. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/getting-started/installation.md +0 -0
  81. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/getting-started/quick-start.md +0 -0
  82. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/index.md +0 -0
  83. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/performance-analysis.md +0 -0
  84. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/api-server.md +0 -0
  85. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/basic-usage.md +0 -0
  86. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/chunking.md +0 -0
  87. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/document-classification.md +0 -0
  88. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/extraction-configuration.md +0 -0
  89. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/index.md +0 -0
  90. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/mcp-server.md +0 -0
  91. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/metadata-extraction.md +0 -0
  92. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/ocr-backends.md +0 -0
  93. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/ocr-configuration.md +0 -0
  94. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/docs/user-guide/supported-formats.md +0 -0
  95. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/__init__.py +0 -0
  96. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/__main__.py +0 -0
  97. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_api/__init__.py +0 -0
  98. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_api/main.py +0 -0
  99. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_chunker.py +0 -0
  100. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_config.py +0 -0
  101. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_constants.py +0 -0
  102. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_document_classification.py +0 -0
  103. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_entity_extraction.py +0 -0
  104. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/__init__.py +0 -0
  105. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_base.py +0 -0
  106. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_email.py +0 -0
  107. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_html.py +0 -0
  108. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_image.py +0 -0
  109. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_pandoc.py +0 -0
  110. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_pdf.py +0 -0
  111. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_presentation.py +0 -0
  112. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_spread_sheet.py +0 -0
  113. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_extractors/_structured.py +0 -0
  114. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_gmft.py +0 -0
  115. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_language_detection.py +0 -0
  116. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_mcp/__init__.py +0 -0
  117. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_mcp/server.py +0 -0
  118. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_mime_types.py +0 -0
  119. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_ocr/__init__.py +0 -0
  120. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_ocr/_base.py +0 -0
  121. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_ocr/_easyocr.py +0 -0
  122. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_ocr/_paddleocr.py +0 -0
  123. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_ocr/_tesseract.py +0 -0
  124. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_playa.py +0 -0
  125. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_registry.py +0 -0
  126. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_types.py +0 -0
  127. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/__init__.py +0 -0
  128. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_cache.py +0 -0
  129. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_device.py +0 -0
  130. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_document_cache.py +0 -0
  131. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_errors.py +0 -0
  132. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_pdf_lock.py +0 -0
  133. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_process_pool.py +0 -0
  134. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_quality.py +0 -0
  135. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_serialization.py +0 -0
  136. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_string.py +0 -0
  137. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_sync.py +0 -0
  138. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_table.py +0 -0
  139. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/_utils/_tmp.py +0 -0
  140. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/exceptions.py +0 -0
  141. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/extraction.py +0 -0
  142. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/kreuzberg/py.typed +0 -0
  143. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/mkdocs.yaml +0 -0
  144. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/__init__.py +0 -0
  145. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/api/__init__.py +0 -0
  146. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/api/main_test.py +0 -0
  147. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/chunker_test.py +0 -0
  148. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/cli_command_test.py +0 -0
  149. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/cli_integration_test.py +0 -0
  150. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/cli_test.py +0 -0
  151. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/config_test.py +0 -0
  152. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/conftest.py +0 -0
  153. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/document_classification_test.py +0 -0
  154. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/entity_extraction_test.py +0 -0
  155. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/exceptions_test.py +0 -0
  156. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extraction_batch_test.py +0 -0
  157. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extraction_test.py +0 -0
  158. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/__init__.py +0 -0
  159. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/email_test.py +0 -0
  160. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/html_test.py +0 -0
  161. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/image_test.py +0 -0
  162. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/pandoc_metadata_test.py +0 -0
  163. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/pandoc_test.py +0 -0
  164. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/pdf_test.py +0 -0
  165. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/presentation_test.py +0 -0
  166. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/spreed_sheet_test.py +0 -0
  167. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/extractors/structured_test.py +0 -0
  168. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/gmft_extended_test.py +0 -0
  169. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/gmft_test.py +0 -0
  170. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/hooks_test.py +0 -0
  171. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/language_detection_test.py +0 -0
  172. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/mcp_server_test.py +0 -0
  173. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/mime_types_test.py +0 -0
  174. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/multiprocessing/__init__.py +0 -0
  175. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/multiprocessing/gmft_integration_test.py +0 -0
  176. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/multiprocessing/gmft_isolated_test.py +0 -0
  177. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/multiprocessing/process_manager_test.py +0 -0
  178. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/multiprocessing/tesseract_pool_test.py +0 -0
  179. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/ocr/__init__.py +0 -0
  180. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/ocr/base_test.py +0 -0
  181. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/ocr/device_integration_test.py +0 -0
  182. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/ocr/easyocr_test.py +0 -0
  183. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/ocr/init_test.py +0 -0
  184. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/ocr/paddleocr_test.py +0 -0
  185. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/ocr/tesseract_test.py +0 -0
  186. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/playa_helpers_test.py +0 -0
  187. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/playa_test.py +0 -0
  188. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/registry_test.py +0 -0
  189. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/better-ocr-image.jpg +0 -0
  190. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/contract_test.txt +0 -0
  191. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/document.docx +0 -0
  192. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/email/sample-email.eml +0 -0
  193. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
  194. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/excel.xlsx +0 -0
  195. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/form_test.txt +0 -0
  196. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/french-text.txt +0 -0
  197. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/german-text.txt +0 -0
  198. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/html.html +0 -0
  199. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/images/test_hello_world.png +0 -0
  200. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/invoice_image.png +0 -0
  201. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/invoice_test.txt +0 -0
  202. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/json/sample-document.json +0 -0
  203. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/layout-parser-ocr.jpg +0 -0
  204. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/markdown.md +0 -0
  205. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/non-ascii-text.pdf +0 -0
  206. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/non-searchable.pdf +0 -0
  207. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/ocr-image.jpg +0 -0
  208. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
  209. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
  210. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
  211. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
  212. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/receipt_test.txt +0 -0
  213. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/report_test.txt +0 -0
  214. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/sample-contract.pdf +0 -0
  215. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/scanned.pdf +0 -0
  216. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/searchable.pdf +0 -0
  217. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/spanish-text.txt +0 -0
  218. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/test-article.pdf +0 -0
  219. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/test_source_files/yaml/sample-config.yaml +0 -0
  220. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/types_test.py +0 -0
  221. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/__init__.py +0 -0
  222. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/cache_test.py +0 -0
  223. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/device_test.py +0 -0
  224. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/errors_test.py +0 -0
  225. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/pdf_lock_test.py +0 -0
  226. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/process_pool_test.py +0 -0
  227. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/serialization_test.py +0 -0
  228. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/string_test.py +0 -0
  229. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/sync_test.py +0 -0
  230. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/table_test.py +0 -0
  231. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils/tmp_test.py +0 -0
  232. {kreuzberg-3.11.3 → kreuzberg-3.11.4}/tests/utils_errors_test.py +0 -0
@@ -46,9 +46,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
46
46
  COPY --from=builder /app/.venv /app/.venv
47
47
  COPY --from=builder /app/kreuzberg /app/kreuzberg
48
48
 
49
- # Create non-root user
49
+ # Create non-root user and cache directory
50
50
  RUN groupadd -r appuser && useradd -r -g appuser -d /app -s /sbin/nologin appuser && \
51
+ mkdir -p /app/.kreuzberg && \
51
52
  chown -R appuser:appuser /app
52
53
 
54
+ # Set default cache directory to prevent permission issues
55
+ ENV KREUZBERG_CACHE_DIR=/app/.kreuzberg
56
+
53
57
  USER appuser
54
58
  CMD ["litestar", "--app", "kreuzberg._api.main:app", "run", "--host", "0.0.0.0"]
@@ -15,6 +15,7 @@ jobs:
15
15
  - { name: "easyocr", extras: "easyocr" }
16
16
  - { name: "paddle", extras: "paddleocr" }
17
17
  - { name: "gmft", extras: "gmft" }
18
+ - { name: "all", extras: "all" }
18
19
  fail-fast: false
19
20
 
20
21
  steps:
@@ -39,3 +39,6 @@ requirements.txt
39
39
  site/
40
40
  .cache/
41
41
  dist/
42
+ .task/
43
+ tests/e2e/test_report.json
44
+ tests/e2e/logs/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.11.3
3
+ Version: 3.11.4
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -0,0 +1,37 @@
1
+ services:
2
+ kreuzberg:
3
+ # Choose your image variant:
4
+ # - goldziher/kreuzberg:latest (core, 270MB)
5
+ # - goldziher/kreuzberg:latest-paddle (+ PaddleOCR, 878MB)
6
+ # - goldziher/kreuzberg:latest-easyocr (+ EasyOCR, 8.7GB)
7
+ # - goldziher/kreuzberg:latest-gmft (+ table extraction, 8.6GB)
8
+ # - goldziher/kreuzberg:latest-all (all features, 9.6GB - testing only!)
9
+ image: goldziher/kreuzberg:latest
10
+
11
+ ports:
12
+ - "8000:8000"
13
+
14
+ volumes:
15
+ # Mount your configuration file (optional)
16
+ - "./kreuzberg.toml:/app/kreuzberg.toml:ro"
17
+ # Persist cache across restarts (recommended)
18
+ - "kreuzberg-cache:/app/.kreuzberg"
19
+
20
+ environment:
21
+ - PYTHONUNBUFFERED=1
22
+ - KREUZBERG_CACHE_DIR=/app/.kreuzberg
23
+ # Optional: Cache size limits
24
+ # - KREUZBERG_OCR_CACHE_SIZE_MB=500
25
+ # - KREUZBERG_DOCUMENT_CACHE_SIZE_MB=1000
26
+
27
+ restart: unless-stopped
28
+
29
+ healthcheck:
30
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
31
+ interval: 30s
32
+ timeout: 10s
33
+ retries: 3
34
+ start_period: 40s
35
+
36
+ volumes:
37
+ kreuzberg-cache:
@@ -0,0 +1,417 @@
1
+ # Docker
2
+
3
+ Kreuzberg provides official Docker images for easy deployment and containerized usage.
4
+
5
+ ## Image Variants
6
+
7
+ Docker images are available on [Docker Hub](https://hub.docker.com/r/goldziher/kreuzberg):
8
+
9
+ ### Core Image
10
+
11
+ - **Image**: `goldziher/kreuzberg:latest`
12
+ - **Size**: ~270MB
13
+ - **Includes**: Base library + API server + Tesseract OCR
14
+ - **Use Case**: Basic text extraction from documents
15
+ - **Limitations**: No chunking, language detection, entity extraction, or alternative OCR backends
16
+
17
+ ### OCR Backend Variants
18
+
19
+ - **EasyOCR**: `goldziher/kreuzberg:latest-easyocr` (~8.7GB)
20
+
21
+ - Deep learning-based OCR with support for 80+ languages
22
+ - Better accuracy for complex layouts and handwriting
23
+
24
+ - **PaddleOCR**: `goldziher/kreuzberg:latest-paddle` (~878MB)
25
+
26
+ - Lightweight deep learning OCR
27
+ - Good balance between size and accuracy
28
+
29
+ ### Table Extraction
30
+
31
+ - **GMFT**: `goldziher/kreuzberg:latest-gmft` (~8.6GB)
32
+ - Advanced table detection and extraction from PDFs
33
+ - Uses Microsoft's Table Transformer models
34
+
35
+ ### All-in-One (Testing Only)
36
+
37
+ - **Image**: `goldziher/kreuzberg:latest-all`
38
+ - **Size**: ~9.6GB
39
+ - **⚠️ WARNING**: For testing only, NOT for production use
40
+ - **Includes**: All OCR backends and features
41
+ - **Why not production?**: Unnecessarily large, includes conflicting dependencies, slower startup
42
+
43
+ ## Quick Start
44
+
45
+ ### Basic Usage
46
+
47
+ ```bash
48
+ # Pull and run the core image
49
+ docker pull goldziher/kreuzberg:latest
50
+ docker run -p 8000:8000 goldziher/kreuzberg:latest
51
+
52
+ # Extract text from a document
53
+ curl -X POST http://localhost:8000/extract \
54
+ -F "data=@document.pdf"
55
+ ```
56
+
57
+ ### With Cache Volume
58
+
59
+ ```bash
60
+ # Create cache directory
61
+ mkdir -p kreuzberg-cache
62
+
63
+ # Run with persistent cache
64
+ docker run -p 8000:8000 \
65
+ -v "$(pwd)/kreuzberg-cache:/app/.kreuzberg" \
66
+ goldziher/kreuzberg:latest
67
+ ```
68
+
69
+ ## Customizing Docker Images
70
+
71
+ For production, create a custom image with only the features you need:
72
+
73
+ ### Example 1: Core + Chunking Support
74
+
75
+ ```dockerfile
76
+ FROM goldziher/kreuzberg:latest
77
+
78
+ USER root
79
+
80
+ # Install only chunking dependency
81
+ RUN uv pip install --python /app/.venv/bin/python semantic-text-splitter
82
+
83
+ USER appuser
84
+ ```
85
+
86
+ Build and run:
87
+
88
+ ```bash
89
+ # Build the image
90
+ docker build -t kreuzberg-chunking .
91
+
92
+ # Run with external configuration
93
+ docker run -p 8000:8000 \
94
+ -v "$(pwd)/kreuzberg.toml:/app/kreuzberg.toml:ro" \
95
+ -v "$(pwd)/cache:/app/.kreuzberg" \
96
+ kreuzberg-chunking
97
+ ```
98
+
99
+ ### Example 2: Core + Language Detection + Chunking
100
+
101
+ ```dockerfile
102
+ FROM goldziher/kreuzberg:latest
103
+
104
+ USER root
105
+
106
+ # Install specific features
107
+ RUN uv pip install --python /app/.venv/bin/python \
108
+ semantic-text-splitter \
109
+ fast-langdetect
110
+
111
+ USER appuser
112
+ ```
113
+
114
+ Create configuration file `kreuzberg.toml`:
115
+
116
+ ```toml
117
+ chunk_content = true
118
+ auto_detect_language = true
119
+ max_chars = 2000
120
+ max_overlap = 100
121
+ ```
122
+
123
+ Run with:
124
+
125
+ ```bash
126
+ docker run -p 8000:8000 \
127
+ -v "$(pwd)/kreuzberg.toml:/app/kreuzberg.toml:ro" \
128
+ -v "$(pwd)/cache:/app/.kreuzberg" \
129
+ kreuzberg-multilang
130
+ ```
131
+
132
+ ### Example 3: Core + PaddleOCR (Custom Build)
133
+
134
+ ```dockerfile
135
+ FROM goldziher/kreuzberg:latest
136
+
137
+ USER root
138
+
139
+ # Install PaddleOCR dependencies
140
+ RUN uv pip install --python /app/.venv/bin/python \
141
+ paddleocr \
142
+ paddlepaddle
143
+
144
+ USER appuser
145
+ ```
146
+
147
+ Run with PaddleOCR backend:
148
+
149
+ ```bash
150
+ docker run -p 8000:8000 \
151
+ -e KREUZBERG_OCR_BACKEND=paddleocr \
152
+ -v "$(pwd)/cache:/app/.kreuzberg" \
153
+ kreuzberg-paddle
154
+ ```
155
+
156
+ ### Example 4: Optimized Production Build
157
+
158
+ ```dockerfile
159
+ FROM goldziher/kreuzberg:latest
160
+
161
+ USER root
162
+
163
+ # Install only the features you need
164
+ RUN uv pip install --python /app/.venv/bin/python \
165
+ semantic-text-splitter \
166
+ fast-langdetect && \
167
+ # Clean up cache to reduce image size
168
+ rm -rf /root/.cache/uv
169
+
170
+ USER appuser
171
+
172
+ # Set production environment variables
173
+ ENV PYTHONUNBUFFERED=1
174
+ ENV PYTHONDONTWRITEBYTECODE=1
175
+ ```
176
+
177
+ Deploy with Docker Compose:
178
+
179
+ ```yaml
180
+ services:
181
+ kreuzberg:
182
+ build: .
183
+ ports:
184
+ - "8000:8000"
185
+ volumes:
186
+ - "./config/kreuzberg.toml:/app/kreuzberg.toml:ro"
187
+ - "kreuzberg-cache:/app/.kreuzberg"
188
+ environment:
189
+ - PYTHONUNBUFFERED=1
190
+ - KREUZBERG_CACHE_DIR=/app/.kreuzberg
191
+ restart: unless-stopped
192
+ healthcheck:
193
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
194
+ interval: 30s
195
+ timeout: 10s
196
+ retries: 3
197
+
198
+ volumes:
199
+ kreuzberg-cache:
200
+ ```
201
+
202
+ ## Docker Compose
203
+
204
+ ### Production Setup
205
+
206
+ ```yaml
207
+ services:
208
+ kreuzberg:
209
+ image: goldziher/kreuzberg:latest # Or your custom image
210
+ ports:
211
+ - "8000:8000"
212
+ volumes:
213
+ - "./kreuzberg-cache:/app/.kreuzberg" # Persistent cache
214
+ - "./kreuzberg.toml:/app/kreuzberg.toml:ro" # Configuration
215
+ environment:
216
+ - PYTHONUNBUFFERED=1
217
+ - KREUZBERG_CACHE_DIR=/app/.kreuzberg
218
+ # Cache configuration
219
+ - KREUZBERG_OCR_CACHE_SIZE_MB=500
220
+ - KREUZBERG_DOCUMENT_CACHE_SIZE_MB=1000
221
+ restart: unless-stopped
222
+ healthcheck:
223
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
224
+ interval: 30s
225
+ timeout: 10s
226
+ retries: 3
227
+ ```
228
+
229
+ ## Configuration
230
+
231
+ ### Using Configuration Files
232
+
233
+ Create `kreuzberg.toml`:
234
+
235
+ ```toml
236
+ force_ocr = false
237
+ chunk_content = true # Requires semantic-text-splitter
238
+ extract_tables = false # Requires gmft
239
+ ocr_backend = "tesseract"
240
+
241
+ [tesseract]
242
+ language = "eng"
243
+ psm = 6
244
+ ```
245
+
246
+ Mount the configuration:
247
+
248
+ ```bash
249
+ docker run -p 8000:8000 \
250
+ -v "$(pwd)/kreuzberg.toml:/app/kreuzberg.toml" \
251
+ goldziher/kreuzberg:latest
252
+ ```
253
+
254
+ ### Environment Variables
255
+
256
+ **Cache Configuration:**
257
+
258
+ - `KREUZBERG_CACHE_DIR`: Cache directory (default: `/app/.kreuzberg`)
259
+ - `KREUZBERG_OCR_CACHE_SIZE_MB`: OCR cache size limit (default: `500`)
260
+ - `KREUZBERG_DOCUMENT_CACHE_SIZE_MB`: Document cache size limit (default: `1000`)
261
+
262
+ **Runtime Configuration:**
263
+
264
+ - `PYTHONUNBUFFERED=1`: Ensures proper logging output
265
+ - `PYTHONDONTWRITEBYTECODE=1`: Prevents .pyc file creation
266
+
267
+ ## Production Deployment
268
+
269
+ ### Kubernetes
270
+
271
+ ```yaml
272
+ apiVersion: apps/v1
273
+ kind: Deployment
274
+ metadata:
275
+ name: kreuzberg-api
276
+ spec:
277
+ replicas: 3
278
+ selector:
279
+ matchLabels:
280
+ app: kreuzberg-api
281
+ template:
282
+ metadata:
283
+ labels:
284
+ app: kreuzberg-api
285
+ spec:
286
+ containers:
287
+ - name: kreuzberg
288
+ image: your-registry/kreuzberg-custom:latest
289
+ ports:
290
+ - containerPort: 8000
291
+ volumeMounts:
292
+ - name: cache
293
+ mountPath: /app/.kreuzberg
294
+ - name: config
295
+ mountPath: /app/kreuzberg.toml
296
+ subPath: kreuzberg.toml
297
+ livenessProbe:
298
+ httpGet:
299
+ path: /health
300
+ port: 8000
301
+ initialDelaySeconds: 30
302
+ readinessProbe:
303
+ httpGet:
304
+ path: /health
305
+ port: 8000
306
+ initialDelaySeconds: 5
307
+ resources:
308
+ requests:
309
+ memory: "512Mi"
310
+ cpu: "500m"
311
+ limits:
312
+ memory: "2Gi"
313
+ cpu: "2000m"
314
+ volumes:
315
+ - name: cache
316
+ emptyDir: {}
317
+ - name: config
318
+ configMap:
319
+ name: kreuzberg-config
320
+ ---
321
+ apiVersion: v1
322
+ kind: ConfigMap
323
+ metadata:
324
+ name: kreuzberg-config
325
+ data:
326
+ kreuzberg.toml: |
327
+ chunk_content = false
328
+ ocr_backend = "tesseract"
329
+ [tesseract]
330
+ language = "eng"
331
+ ```
332
+
333
+ ### With nginx Reverse Proxy
334
+
335
+ ```nginx
336
+ server {
337
+ listen 80;
338
+ server_name api.example.com;
339
+
340
+ location / {
341
+ proxy_pass http://localhost:8000;
342
+ proxy_set_header Host $host;
343
+ proxy_set_header X-Real-IP $remote_addr;
344
+
345
+ # File upload settings
346
+ client_max_body_size 100M;
347
+ proxy_read_timeout 300s;
348
+ }
349
+
350
+ location /health {
351
+ proxy_pass http://localhost:8000/health;
352
+ access_log off;
353
+ }
354
+ }
355
+ ```
356
+
357
+ ## Resource Requirements
358
+
359
+ | Variant | CPU | Memory | Storage |
360
+ | ----------- | -------- | ------ | ------- |
361
+ | Core | 1+ cores | 512MB+ | 1GB |
362
+ | + Chunking | 1+ cores | 1GB+ | 1GB |
363
+ | + PaddleOCR | 2+ cores | 2GB+ | 2GB |
364
+ | + EasyOCR | 2+ cores | 4GB+ | 10GB |
365
+ | + GMFT | 2+ cores | 4GB+ | 10GB |
366
+
367
+ ## Troubleshooting
368
+
369
+ ### Common Issues
370
+
371
+ #### Permission Denied on Cache Directory
372
+
373
+ ```bash
374
+ # Fix: Ensure proper ownership
375
+ docker run --rm -v "$(pwd)/cache:/app/.kreuzberg" --user root \
376
+ goldziher/kreuzberg:latest \
377
+ chown -R 999:999 /app/.kreuzberg
378
+ ```
379
+
380
+ #### Missing Dependencies Error
381
+
382
+ ```bash
383
+ # Solution: Use appropriate image variant or build custom image
384
+ # For chunking: Install semantic-text-splitter
385
+ # For language detection: Install fast-langdetect
386
+ ```
387
+
388
+ #### Out of Memory
389
+
390
+ - Increase Docker memory allocation
391
+ - Use a smaller OCR engine (Tesseract instead of EasyOCR)
392
+ - Disable unnecessary features
393
+
394
+ ### Debugging
395
+
396
+ ```bash
397
+ # Check logs
398
+ docker logs <container-id>
399
+
400
+ # Shell access
401
+ docker exec -it <container-id> /bin/bash
402
+
403
+ # Test extraction
404
+ docker exec <container-id> python3 -c "
405
+ from kreuzberg import extract_file_sync
406
+ result = extract_file_sync('/path/to/file.pdf')
407
+ print(result.content[:100])
408
+ "
409
+ ```
410
+
411
+ ## Security Considerations
412
+
413
+ - Runs as non-root user (`appuser`) by default
414
+ - No external API calls or cloud dependencies
415
+ - Process files locally within the container
416
+ - Use read-only mounts where possible (`:ro`)
417
+ - Consider adding authentication for production use
@@ -19,11 +19,9 @@ except ImportError as e: # pragma: no cover
19
19
 
20
20
  from kreuzberg import __version__, extract_bytes_sync, extract_file_sync
21
21
  from kreuzberg._config import build_extraction_config, find_config_file, load_config_from_file
22
+ from kreuzberg._constants import DEFAULT_MAX_CHARACTERS, DEFAULT_MAX_OVERLAP
22
23
  from kreuzberg.exceptions import KreuzbergError, MissingDependencyError
23
24
 
24
- DEFAULT_MAX_CHARACTERS = 4000
25
- DEFAULT_MAX_OVERLAP = 200
26
-
27
25
  if TYPE_CHECKING:
28
26
  from kreuzberg._types import ExtractionConfig, ExtractionResult
29
27
 
@@ -5,7 +5,7 @@ requires = [ "hatchling" ]
5
5
 
6
6
  [project]
7
7
  name = "kreuzberg"
8
- version = "3.11.3"
8
+ version = "3.11.4"
9
9
  description = "Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats"
10
10
  readme = "README.md"
11
11
  keywords = [
@@ -202,9 +202,7 @@ lint.per-file-ignores."tests/**/*.*" = [
202
202
  "SLF001",
203
203
  ]
204
204
  lint.per-file-ignores."tests/e2e/*_test.py" = [
205
- "ANN", # Disable type annotations in E2E tests
206
- "PTH123", # Allow open() in E2E tests
207
- "T201", # print statements are needed for test output
205
+ "T201", # print statements are needed for test output
208
206
  ]
209
207
  lint.isort.known-first-party = [ "kreuzberg", "tests" ]
210
208
  lint.mccabe.max-complexity = 15
@@ -31,10 +31,11 @@ DOCKER_IMAGES = {
31
31
  "easyocr": "kreuzberg:easyocr",
32
32
  "paddle": "kreuzberg:paddle",
33
33
  "gmft": "kreuzberg:gmft",
34
+ "all": "kreuzberg:all",
34
35
  }
35
36
 
36
37
  # Images that are optional (may not be built due to space constraints)
37
- OPTIONAL_IMAGES = {"paddle", "gmft"}
38
+ OPTIONAL_IMAGES = {"paddle", "gmft", "all"}
38
39
 
39
40
  # Security configuration
40
41
  SECURITY_CONFIG = {
@@ -111,7 +112,7 @@ def test_cli_help(image_name: str) -> bool:
111
112
  "--help",
112
113
  ]
113
114
  exit_code, stdout, stderr = run_command(cmd)
114
- success = exit_code == 0 and "Extract text from documents" in stdout
115
+ success = exit_code == 0 and "Text extraction from documents" in stdout
115
116
  if not success:
116
117
  pass
117
118
  return success
@@ -536,13 +537,11 @@ def print_summary(all_results: dict[str, dict[str, bool]]) -> bool:
536
537
  success_rate = (total_passed / total_tests * 100) if total_tests > 0 else 0
537
538
  print(f"Success rate: {success_rate:.1f}%")
538
539
 
539
- if success_rate >= 90:
540
+ # ALL tests must pass - no partial success allowed
541
+ if success_rate == 100:
540
542
  print("✅ Test suite PASSED")
541
543
  return True
542
- if success_rate >= 70:
543
- print("⚠️ Test suite PASSED with warnings")
544
- return True
545
- print("❌ Test suite FAILED")
544
+ print("❌ Test suite FAILED - all tests must pass")
546
545
  return False
547
546
 
548
547
 
@@ -0,0 +1 @@
1
+ Test content for contract.txt file
@@ -956,7 +956,7 @@ name = "exceptiongroup"
956
956
  version = "1.3.0"
957
957
  source = { registry = "https://pypi.org/simple" }
958
958
  dependencies = [
959
- { name = "typing-extensions", marker = "python_full_version < '3.13'" },
959
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
960
960
  ]
961
961
  sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
962
962
  wheels = [
@@ -1632,7 +1632,7 @@ wheels = [
1632
1632
 
1633
1633
  [[package]]
1634
1634
  name = "kreuzberg"
1635
- version = "3.11.2"
1635
+ version = "3.11.4"
1636
1636
  source = { editable = "." }
1637
1637
  dependencies = [
1638
1638
  { name = "anyio" },
@@ -1 +0,0 @@
1
- f2683e2fcbf67fa54bee4baa3ead293a
@@ -1 +0,0 @@
1
- f2683e2fcbf67fa54bee4baa3ead293a
@@ -1 +0,0 @@
1
- f2683e2fcbf67fa54bee4baa3ead293a
@@ -1 +0,0 @@
1
- f2683e2fcbf67fa54bee4baa3ead293a