kreuzberg 3.11.0__tar.gz → 3.11.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.github/workflows/ci.yaml +3 -3
  2. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.github/workflows/docs.yml +1 -1
  3. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.github/workflows/pr-title.yaml +1 -1
  4. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.github/workflows/publish-docker.yml +1 -1
  5. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.github/workflows/release.yaml +1 -1
  6. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.pre-commit-config.yaml +9 -7
  7. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/PKG-INFO +8 -8
  8. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/examples/extraction-examples.md +4 -4
  9. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/extraction-configuration.md +3 -3
  10. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_ocr/_easyocr.py +8 -1
  11. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_ocr/_paddleocr.py +2 -1
  12. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/pyproject.toml +8 -8
  13. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/uv.lock +963 -929
  14. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.commitlintrc +0 -0
  15. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.deepsource.toml +0 -0
  16. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.docker/Dockerfile +0 -0
  17. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.docker/README.md +0 -0
  18. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.dockerignore +0 -0
  19. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.github/dependabot.yaml +0 -0
  20. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.gitignore +0 -0
  21. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/.markdownlint.yaml +0 -0
  22. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/LICENSE +0 -0
  23. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/README.md +0 -0
  24. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/ai-rulez.yaml +0 -0
  25. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/README.md +0 -0
  26. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/benchmark_baseline.py +0 -0
  27. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/end_to_end_benchmark.py +0 -0
  28. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/final_benchmark.py +0 -0
  29. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/pyproject.toml +0 -0
  30. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/baseline_results.json +0 -0
  31. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/benchmark_msgpack_20250702_003800.json +0 -0
  32. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/comprehensive_caching_results.json +0 -0
  33. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/final_benchmark_results.json +0 -0
  34. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/latest.json +0 -0
  35. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/mime_caching_results.json +0 -0
  36. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/msgspec_caching_results.json +0 -0
  37. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/ocr_caching_results.json +0 -0
  38. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/serialization_benchmark_results.json +0 -0
  39. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/statistical_benchmark_results.json +0 -0
  40. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/results/table_caching_results.json +0 -0
  41. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/serialization_benchmark.py +0 -0
  42. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/src/kreuzberg_benchmarks/__init__.py +0 -0
  43. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/src/kreuzberg_benchmarks/__main__.py +0 -0
  44. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/src/kreuzberg_benchmarks/benchmarks.py +0 -0
  45. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/src/kreuzberg_benchmarks/cli.py +0 -0
  46. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/src/kreuzberg_benchmarks/models.py +0 -0
  47. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/src/kreuzberg_benchmarks/profiler.py +0 -0
  48. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/src/kreuzberg_benchmarks/runner.py +0 -0
  49. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/benchmarks/statistical_benchmark.py +0 -0
  50. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/advanced/custom-extractors.md +0 -0
  51. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/advanced/custom-hooks.md +0 -0
  52. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/advanced/error-handling.md +0 -0
  53. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/advanced/index.md +0 -0
  54. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/advanced/performance.md +0 -0
  55. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/api-reference/exceptions.md +0 -0
  56. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/api-reference/extraction-functions.md +0 -0
  57. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/api-reference/extractor-registry.md +0 -0
  58. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/api-reference/index.md +0 -0
  59. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/api-reference/ocr-configuration.md +0 -0
  60. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/api-reference/types.md +0 -0
  61. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/assets/favicon.png +0 -0
  62. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/assets/logo.png +0 -0
  63. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/cli.md +0 -0
  64. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/contributing.md +0 -0
  65. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/css/extra.css +0 -0
  66. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/examples/index.md +0 -0
  67. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/getting-started/index.md +0 -0
  68. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/getting-started/installation.md +0 -0
  69. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/getting-started/quick-start.md +0 -0
  70. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/index.md +0 -0
  71. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/performance-analysis.md +0 -0
  72. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/api-server.md +0 -0
  73. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/basic-usage.md +0 -0
  74. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/chunking.md +0 -0
  75. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/docker.md +0 -0
  76. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/document-classification.md +0 -0
  77. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/index.md +0 -0
  78. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/mcp-server.md +0 -0
  79. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/metadata-extraction.md +0 -0
  80. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/ocr-backends.md +0 -0
  81. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/ocr-configuration.md +0 -0
  82. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/docs/user-guide/supported-formats.md +0 -0
  83. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/__init__.py +0 -0
  84. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/__main__.py +0 -0
  85. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_api/__init__.py +0 -0
  86. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_api/main.py +0 -0
  87. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_chunker.py +0 -0
  88. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_config.py +0 -0
  89. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_constants.py +0 -0
  90. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_document_classification.py +0 -0
  91. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_entity_extraction.py +0 -0
  92. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/__init__.py +0 -0
  93. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_base.py +0 -0
  94. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_email.py +0 -0
  95. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_html.py +0 -0
  96. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_image.py +0 -0
  97. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_pandoc.py +0 -0
  98. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_pdf.py +0 -0
  99. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_presentation.py +0 -0
  100. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_spread_sheet.py +0 -0
  101. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_extractors/_structured.py +0 -0
  102. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_gmft.py +0 -0
  103. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_language_detection.py +0 -0
  104. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_mcp/__init__.py +0 -0
  105. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_mcp/server.py +0 -0
  106. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_mime_types.py +0 -0
  107. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_ocr/__init__.py +0 -0
  108. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_ocr/_base.py +0 -0
  109. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_ocr/_tesseract.py +0 -0
  110. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_playa.py +0 -0
  111. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_registry.py +0 -0
  112. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_types.py +0 -0
  113. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/__init__.py +0 -0
  114. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_cache.py +0 -0
  115. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_device.py +0 -0
  116. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_document_cache.py +0 -0
  117. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_errors.py +0 -0
  118. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_pdf_lock.py +0 -0
  119. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_process_pool.py +0 -0
  120. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_quality.py +0 -0
  121. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_serialization.py +0 -0
  122. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_string.py +0 -0
  123. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_sync.py +0 -0
  124. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_table.py +0 -0
  125. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/_utils/_tmp.py +0 -0
  126. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/cli.py +0 -0
  127. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/exceptions.py +0 -0
  128. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/extraction.py +0 -0
  129. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/kreuzberg/py.typed +0 -0
  130. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/mkdocs.yaml +0 -0
  131. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/__init__.py +0 -0
  132. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/api/__init__.py +0 -0
  133. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/api/main_test.py +0 -0
  134. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/chunker_test.py +0 -0
  135. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/cli_command_test.py +0 -0
  136. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/cli_integration_test.py +0 -0
  137. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/cli_test.py +0 -0
  138. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/config_test.py +0 -0
  139. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/conftest.py +0 -0
  140. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/document_classification_test.py +0 -0
  141. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/entity_extraction_test.py +0 -0
  142. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/exceptions_test.py +0 -0
  143. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extraction_batch_test.py +0 -0
  144. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extraction_test.py +0 -0
  145. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/__init__.py +0 -0
  146. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/email_test.py +0 -0
  147. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/html_test.py +0 -0
  148. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/image_test.py +0 -0
  149. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/pandoc_metadata_test.py +0 -0
  150. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/pandoc_test.py +0 -0
  151. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/pdf_test.py +0 -0
  152. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/presentation_test.py +0 -0
  153. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/spreed_sheet_test.py +0 -0
  154. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/extractors/structured_test.py +0 -0
  155. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/gmft_extended_test.py +0 -0
  156. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/gmft_test.py +0 -0
  157. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/hooks_test.py +0 -0
  158. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/language_detection_test.py +0 -0
  159. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/mcp_server_test.py +0 -0
  160. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/mime_types_test.py +0 -0
  161. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/multiprocessing/__init__.py +0 -0
  162. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/multiprocessing/gmft_integration_test.py +0 -0
  163. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/multiprocessing/gmft_isolated_test.py +0 -0
  164. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/multiprocessing/process_manager_test.py +0 -0
  165. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/multiprocessing/tesseract_pool_test.py +0 -0
  166. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/ocr/__init__.py +0 -0
  167. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/ocr/base_test.py +0 -0
  168. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/ocr/device_integration_test.py +0 -0
  169. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/ocr/easyocr_test.py +0 -0
  170. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/ocr/init_test.py +0 -0
  171. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/ocr/paddleocr_test.py +0 -0
  172. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/ocr/tesseract_test.py +0 -0
  173. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/playa_helpers_test.py +0 -0
  174. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/playa_test.py +0 -0
  175. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/registry_test.py +0 -0
  176. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/better-ocr-image.jpg +0 -0
  177. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/contract.txt +0 -0
  178. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/contract_test.txt +0 -0
  179. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/document.docx +0 -0
  180. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/email/sample-email.eml +0 -0
  181. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
  182. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/excel.xlsx +0 -0
  183. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/form_test.txt +0 -0
  184. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/french-text.txt +0 -0
  185. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/german-text.txt +0 -0
  186. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/html.html +0 -0
  187. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/images/test_hello_world.png +0 -0
  188. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/invoice_image.png +0 -0
  189. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/invoice_test.txt +0 -0
  190. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/json/sample-document.json +0 -0
  191. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/layout-parser-ocr.jpg +0 -0
  192. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/markdown.md +0 -0
  193. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/non-ascii-text.pdf +0 -0
  194. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/non-searchable.pdf +0 -0
  195. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/ocr-image.jpg +0 -0
  196. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
  197. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
  198. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
  199. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
  200. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/receipt_test.txt +0 -0
  201. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/report_test.txt +0 -0
  202. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/sample-contract.pdf +0 -0
  203. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/scanned.pdf +0 -0
  204. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/searchable.pdf +0 -0
  205. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/spanish-text.txt +0 -0
  206. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/test-article.pdf +0 -0
  207. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/test_source_files/yaml/sample-config.yaml +0 -0
  208. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/types_test.py +0 -0
  209. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/__init__.py +0 -0
  210. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/cache_test.py +0 -0
  211. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/device_test.py +0 -0
  212. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/errors_test.py +0 -0
  213. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/pdf_lock_test.py +0 -0
  214. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/process_pool_test.py +0 -0
  215. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/serialization_test.py +0 -0
  216. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/string_test.py +0 -0
  217. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/sync_test.py +0 -0
  218. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/table_test.py +0 -0
  219. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils/tmp_test.py +0 -0
  220. {kreuzberg-3.11.0 → kreuzberg-3.11.1}/tests/utils_errors_test.py +0 -0
@@ -15,7 +15,7 @@ jobs:
15
15
  timeout-minutes: 10
16
16
  steps:
17
17
  - name: Checkout
18
- uses: actions/checkout@v4
18
+ uses: actions/checkout@v5
19
19
 
20
20
  - name: Install uv
21
21
  uses: astral-sh/setup-uv@v6
@@ -58,7 +58,7 @@ jobs:
58
58
  timeout-minutes: 20
59
59
  steps:
60
60
  - name: Checkout
61
- uses: actions/checkout@v4
61
+ uses: actions/checkout@v5
62
62
 
63
63
  - name: Install uv
64
64
  uses: astral-sh/setup-uv@v6
@@ -151,7 +151,7 @@ jobs:
151
151
  timeout-minutes: 30
152
152
  steps:
153
153
  - name: Checkout
154
- uses: actions/checkout@v4
154
+ uses: actions/checkout@v5
155
155
 
156
156
  - name: Install uv
157
157
  uses: astral-sh/setup-uv@v6
@@ -24,7 +24,7 @@ jobs:
24
24
  runs-on: ubuntu-latest
25
25
  steps:
26
26
  - name: Checkout repository
27
- uses: actions/checkout@v4
27
+ uses: actions/checkout@v5
28
28
  with:
29
29
  fetch-depth: 0
30
30
 
@@ -15,6 +15,6 @@ jobs:
15
15
  name: Validate PR title
16
16
  runs-on: ubuntu-latest
17
17
  steps:
18
- - uses: amannn/action-semantic-pull-request@v5
18
+ - uses: amannn/action-semantic-pull-request@v6
19
19
  env:
20
20
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -46,7 +46,7 @@ jobs:
46
46
  df -h
47
47
 
48
48
  - name: Checkout repository
49
- uses: actions/checkout@v4
49
+ uses: actions/checkout@v5
50
50
  with:
51
51
  ref: ${{ github.ref }}
52
52
 
@@ -13,7 +13,7 @@ jobs:
13
13
  contents: read
14
14
  steps:
15
15
  - name: Checkout
16
- uses: actions/checkout@v4
16
+ uses: actions/checkout@v5
17
17
 
18
18
  - name: Install uv
19
19
  uses: astral-sh/setup-uv@v6
@@ -5,13 +5,15 @@ repos:
5
5
  - id: commitlint
6
6
  stages: [commit-msg]
7
7
  additional_dependencies: ["@commitlint/config-conventional"]
8
- - repo: https://github.com/Goldziher/ai-rulez
9
- rev: v1.1.4
10
- hooks:
11
- - id: ai-rulez-validate
12
- - id: ai-rulez-generate
8
+ # Temporarily disabled - ai-rulez Go build failing in CI
9
+ # TODO: Re-enable once ai-rulez v1.4.4+ Python migration is stable
10
+ # - repo: https://github.com/Goldziher/ai-rulez
11
+ # rev: v1.4.3
12
+ # hooks:
13
+ # - id: ai-rulez-validate
14
+ # - id: ai-rulez-generate
13
15
  - repo: https://github.com/pre-commit/pre-commit-hooks
14
- rev: v5.0.0
16
+ rev: v6.0.0
15
17
  hooks:
16
18
  - id: name-tests-test
17
19
  args:
@@ -53,7 +55,7 @@ repos:
53
55
  hooks:
54
56
  - id: pyproject-fmt
55
57
  - repo: https://github.com/astral-sh/ruff-pre-commit
56
- rev: v0.12.7
58
+ rev: v0.12.8
57
59
  hooks:
58
60
  - id: ruff
59
61
  args: ["--fix", "--unsafe-fixes"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.11.0
3
+ Version: 3.11.1
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -28,13 +28,13 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
28
  Classifier: Topic :: Text Processing :: General
29
29
  Classifier: Typing :: Typed
30
30
  Requires-Python: >=3.10
31
- Requires-Dist: anyio>=4.9.0
31
+ Requires-Dist: anyio>=4.10.0
32
32
  Requires-Dist: chardetng-py>=0.3.5
33
33
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
34
  Requires-Dist: html-to-markdown[lxml]>=1.9.0
35
- Requires-Dist: mcp>=1.12.3
35
+ Requires-Dist: mcp>=1.12.4
36
36
  Requires-Dist: msgspec>=0.18.0
37
- Requires-Dist: playa-pdf>=0.6.4
37
+ Requires-Dist: playa-pdf>=0.7.0
38
38
  Requires-Dist: psutil>=7.0.0
39
39
  Requires-Dist: pypdfium2==4.30.0
40
40
  Requires-Dist: python-calamine>=0.3.2
@@ -50,19 +50,19 @@ Requires-Dist: easyocr>=1.7.2; extra == 'all'
50
50
  Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
51
51
  Requires-Dist: gmft>=0.4.2; extra == 'all'
52
52
  Requires-Dist: keybert>=0.9.0; extra == 'all'
53
- Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'all'
53
+ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
54
54
  Requires-Dist: mailparse>=1.0.15; extra == 'all'
55
55
  Requires-Dist: paddleocr>=3.1.0; extra == 'all'
56
56
  Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
57
57
  Requires-Dist: pandas>=2.3.1; extra == 'all'
58
- Requires-Dist: playa-pdf[crypto]>=0.6.4; extra == 'all'
58
+ Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
59
59
  Requires-Dist: rich>=14.1.0; extra == 'all'
60
60
  Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
61
61
  Requires-Dist: setuptools>=80.9.0; extra == 'all'
62
62
  Requires-Dist: spacy>=3.8.7; extra == 'all'
63
63
  Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
64
64
  Provides-Extra: api
65
- Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'api'
65
+ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'api'
66
66
  Provides-Extra: chunking
67
67
  Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
68
68
  Provides-Extra: cli
@@ -70,7 +70,7 @@ Requires-Dist: click>=8.2.1; extra == 'cli'
70
70
  Requires-Dist: rich>=14.1.0; extra == 'cli'
71
71
  Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
72
72
  Provides-Extra: crypto
73
- Requires-Dist: playa-pdf[crypto]>=0.6.4; extra == 'crypto'
73
+ Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'crypto'
74
74
  Provides-Extra: document-classification
75
75
  Requires-Dist: deep-translator>=1.11.4; extra == 'document-classification'
76
76
  Requires-Dist: pandas>=2.3.1; extra == 'document-classification'
@@ -132,15 +132,15 @@ async def extract_tables_from_pdf():
132
132
  # Process extracted tables
133
133
  print(f"Found {len(result.tables)} tables")
134
134
  for i, table in enumerate(result.tables):
135
- print(f"Table {i+1} on page {table.page_number}:")
136
- print(table.text) # Markdown formatted table
135
+ print(f"Table {i+1} on page {table['page_number']}:")
136
+ print(table["text"]) # Markdown formatted table
137
137
 
138
138
  # Work with the pandas DataFrame
139
- df = table.df
139
+ df = table["df"]
140
140
  print(f"Table shape: {df.shape}")
141
141
 
142
142
  # The cropped table image is also available
143
- # table.cropped_image.save(f"table_{i+1}.png")
143
+ # table['cropped_image'].save(f"table_{i+1}.png")
144
144
 
145
145
  # With custom GMFT configuration
146
146
  custom_config = ExtractionConfig(
@@ -237,10 +237,10 @@ result = await extract_file("document_with_tables.pdf", config=config)
237
237
 
238
238
  # Access extracted tables
239
239
  for i, table in enumerate(result.tables):
240
- print(f"Table {i+1} on page {table.page_number}:")
241
- print(table.text) # Markdown formatted table text
240
+ print(f"Table {i+1} on page {table['page_number']}:")
241
+ print(table["text"]) # Markdown formatted table text
242
242
  # You can also access the pandas DataFrame directly
243
- df = table.df
243
+ df = table["df"]
244
244
  print(df.shape) # (rows, columns)
245
245
  ```
246
246
 
@@ -4,7 +4,6 @@ import warnings
4
4
  from dataclasses import dataclass
5
5
  from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
6
6
 
7
- import numpy as np
8
7
  from PIL import Image
9
8
 
10
9
  from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
@@ -188,6 +187,9 @@ class EasyOCRBackend(OCRBackend[EasyOCRConfig]):
188
187
 
189
188
  kwargs.pop("language", None)
190
189
  kwargs.pop("use_gpu", None)
190
+ kwargs.pop("device", None)
191
+ kwargs.pop("gpu_memory_limit", None)
192
+ kwargs.pop("fallback_to_cpu", None)
191
193
 
192
194
  try:
193
195
  result = await run_sync(
@@ -455,11 +457,16 @@ class EasyOCRBackend(OCRBackend[EasyOCRConfig]):
455
457
  Raises:
456
458
  OCRError: If OCR processing fails.
457
459
  """
460
+ import numpy as np # noqa: PLC0415
461
+
458
462
  self._init_easyocr_sync(**kwargs)
459
463
 
460
464
  beam_width = kwargs.pop("beam_width")
461
465
  kwargs.pop("language", None)
462
466
  kwargs.pop("use_gpu", None)
467
+ kwargs.pop("device", None)
468
+ kwargs.pop("gpu_memory_limit", None)
469
+ kwargs.pop("fallback_to_cpu", None)
463
470
 
464
471
  try:
465
472
  result = self._reader.readtext(
@@ -7,7 +7,6 @@ from importlib.util import find_spec
7
7
  from pathlib import Path
8
8
  from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
9
9
 
10
- import numpy as np
11
10
  from PIL import Image
12
11
 
13
12
  from kreuzberg._mime_types import PLAIN_TEXT_MIME_TYPE
@@ -380,6 +379,8 @@ class PaddleBackend(OCRBackend[PaddleOCRConfig]):
380
379
  Raises:
381
380
  OCRError: If OCR processing fails.
382
381
  """
382
+ import numpy as np # noqa: PLC0415
383
+
383
384
  self._init_paddle_ocr_sync(**kwargs)
384
385
 
385
386
  if image.mode != "RGB":
@@ -5,7 +5,7 @@ requires = [ "hatchling" ]
5
5
 
6
6
  [project]
7
7
  name = "kreuzberg"
8
- version = "3.11.0"
8
+ version = "3.11.1"
9
9
  description = "Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats"
10
10
  readme = "README.md"
11
11
  keywords = [
@@ -57,13 +57,13 @@ classifiers = [
57
57
  ]
58
58
 
59
59
  dependencies = [
60
- "anyio>=4.9.0",
60
+ "anyio>=4.10.0",
61
61
  "chardetng-py>=0.3.5",
62
62
  "exceptiongroup>=1.2.2; python_version<'3.11'",
63
63
  "html-to-markdown[lxml]>=1.9.0",
64
- "mcp>=1.12.3",
64
+ "mcp>=1.12.4",
65
65
  "msgspec>=0.18.0",
66
- "playa-pdf>=0.6.4", # pinned due to breaking changes in 0.5.0
66
+ "playa-pdf>=0.7.0", # pinned due to breaking changes in 0.5.0
67
67
  "psutil>=7.0.0",
68
68
  "pypdfium2==4.30.0", # pinned due to bug in 4.30.1, until v5 is stable
69
69
  "python-calamine>=0.3.2",
@@ -79,7 +79,7 @@ optional-dependencies.all = [
79
79
  "kreuzberg[additional-extensions,api,chunking,cli,crypto,document-classification,easyocr,entity-extraction,gmft,langdetect,paddleocr]",
80
80
  ]
81
81
  optional-dependencies.api = [
82
- "litestar[standard,structlog,opentelemetry]>=2.16.0",
82
+ "litestar[standard,structlog,opentelemetry]>=2.17.0",
83
83
  ]
84
84
  optional-dependencies.chunking = [ "semantic-text-splitter>=0.27.0" ]
85
85
  optional-dependencies.cli = [
@@ -87,7 +87,7 @@ optional-dependencies.cli = [
87
87
  "rich>=14.1.0",
88
88
  "tomli>=2.0.0; python_version<'3.11'",
89
89
  ]
90
- optional-dependencies.crypto = [ "playa-pdf[crypto]>=0.6.4" ]
90
+ optional-dependencies.crypto = [ "playa-pdf[crypto]>=0.7.0" ]
91
91
  optional-dependencies.document-classification = [
92
92
  "deep-translator>=1.11.4",
93
93
  "pandas>=2.3.1",
@@ -111,13 +111,13 @@ scripts.kreuzberg-mcp = "kreuzberg._mcp.server:main"
111
111
  dev = [
112
112
  "covdefaults>=2.3.0",
113
113
  "mypy>=1.16.1",
114
- "pre-commit>=4.2.0",
114
+ "pre-commit>=4.3.0",
115
115
  "pytest>=8.4.1",
116
116
  "pytest-cov>=6.2.1",
117
117
  "pytest-mock>=3.14.0",
118
118
  "pytest-rerunfailures>=15.1",
119
119
  "pytest-timeout>=2.4.0",
120
- "ruff>=0.12.5",
120
+ "ruff>=0.12.8",
121
121
  "trio>=0.30.0",
122
122
  "uv-bump",
123
123
  ]