kreuzberg 3.13.2__tar.gz → 3.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/ci.yaml +183 -20
  2. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/docker-e2e-tests.yml +2 -3
  3. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/docs.yml +1 -1
  4. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/publish-docker.yml +1 -1
  5. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/release.yaml +1 -1
  6. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.gitignore +3 -0
  7. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.pre-commit-config.yaml +2 -7
  8. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/PKG-INFO +10 -10
  9. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/ai-rulez.yaml +236 -176
  10. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/examples/extraction-examples.md +1 -1
  11. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/api-server.md +59 -0
  12. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/extraction-configuration.md +75 -0
  13. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/ocr-configuration.md +65 -1
  14. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_api/main.py +117 -15
  15. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_config.py +3 -0
  16. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_image.py +20 -2
  17. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_pdf.py +21 -1
  18. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_spread_sheet.py +16 -2
  19. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_gmft.py +79 -33
  20. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_mcp/server.py +0 -76
  21. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_base.py +1 -2
  22. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_paddleocr.py +39 -13
  23. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_tesseract.py +16 -6
  24. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_registry.py +26 -0
  25. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_types.py +64 -1
  26. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_cache.py +34 -12
  27. kreuzberg-3.14.0/kreuzberg/_utils/_image_preprocessing.py +346 -0
  28. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_ocr_cache.py +2 -5
  29. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_process_pool.py +3 -3
  30. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_table.py +4 -1
  31. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/cli.py +19 -2
  32. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/extraction.py +4 -4
  33. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/pyproject.toml +15 -15
  34. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/api/main_test.py +31 -0
  35. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/api/runtime_config_test.py +1 -1
  36. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/conftest.py +99 -0
  37. kreuzberg-3.14.0/tests/core/dpi_configuration_test.py +353 -0
  38. kreuzberg-3.14.0/tests/core/html_to_markdown_config_test.py +0 -0
  39. kreuzberg-3.14.0/tests/core/mime_types_test.py +0 -0
  40. kreuzberg-3.14.0/tests/core/registry_test.py +0 -0
  41. kreuzberg-3.14.0/tests/core/types_test.py +0 -0
  42. kreuzberg-3.14.0/tests/e2e/__init__.py +0 -0
  43. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/e2e/docker_e2e_test.py +4 -4
  44. kreuzberg-3.14.0/tests/extractors/__init__.py +0 -0
  45. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/html_test.py +1 -1
  46. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/image_test.py +7 -3
  47. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/pandoc_test.py +1 -0
  48. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/pdf_test.py +7 -22
  49. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/presentation_test.py +1 -1
  50. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/spreed_sheet_test.py +4 -0
  51. kreuzberg-3.14.0/tests/features/__init__.py +0 -0
  52. kreuzberg-3.14.0/tests/features/chunker_test.py +0 -0
  53. kreuzberg-3.14.0/tests/features/document_classification_test.py +0 -0
  54. kreuzberg-3.14.0/tests/features/entity_extraction_test.py +0 -0
  55. kreuzberg-3.14.0/tests/features/gmft_test.py +0 -0
  56. kreuzberg-3.14.0/tests/features/hooks_test.py +0 -0
  57. kreuzberg-3.14.0/tests/features/language_detection_test.py +0 -0
  58. kreuzberg-3.14.0/tests/integration/__init__.py +0 -0
  59. kreuzberg-3.14.0/tests/integration/api/__init__.py +0 -0
  60. kreuzberg-3.14.0/tests/integration/api/large_file_test.py +0 -0
  61. kreuzberg-3.14.0/tests/integration/api/mounted_config_test.py +0 -0
  62. kreuzberg-3.14.0/tests/integration/dpi_integration_test.py +244 -0
  63. kreuzberg-3.14.0/tests/integration/multiprocessing/__init__.py +0 -0
  64. kreuzberg-3.14.0/tests/integration/multiprocessing/gmft_integration_test.py +0 -0
  65. kreuzberg-3.14.0/tests/integration/ocr/__init__.py +0 -0
  66. kreuzberg-3.14.0/tests/integration/ocr/device_integration_test.py +0 -0
  67. kreuzberg-3.14.0/tests/integration/ocr/tesseract_sync_formats_test.py +0 -0
  68. kreuzberg-3.14.0/tests/integration/ocr/tesseract_tsv_integration_test.py +0 -0
  69. kreuzberg-3.14.0/tests/integration/regression_test.py +134 -0
  70. kreuzberg-3.14.0/tests/interfaces/__init__.py +0 -0
  71. kreuzberg-3.14.0/tests/mcp/__init__.py +0 -0
  72. kreuzberg-3.14.0/tests/mcp/mcp_server_test.py +0 -0
  73. kreuzberg-3.14.0/tests/multiprocessing/__init__.py +0 -0
  74. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/multiprocessing/gmft_isolated_test.py +54 -58
  75. kreuzberg-3.14.0/tests/ocr/__init__.py +0 -0
  76. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/easyocr_test.py +1 -10
  77. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/paddleocr_test.py +7 -18
  78. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/tesseract_test.py +1 -3
  79. kreuzberg-3.14.0/tests/test_source_files/Xerox_AltaLink_series_mfp_sag_en-US 2.pdf +0 -0
  80. kreuzberg-3.14.0/tests/test_source_files/google-doc-document.pdf +0 -0
  81. kreuzberg-3.14.0/tests/test_source_files/sharable-web-guide.pdf +0 -0
  82. kreuzberg-3.14.0/tests/test_source_files/test-excel.xls +0 -0
  83. kreuzberg-3.14.0/tests/utils/__init__.py +0 -0
  84. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/device_test.py +1 -1
  85. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/ocr_cache_test.py +7 -43
  86. kreuzberg-3.14.0/tests/utils/playa_helpers_test.py +0 -0
  87. kreuzberg-3.14.0/tests/utils/playa_test.py +0 -0
  88. kreuzberg-3.14.0/tests/utils/quality_test.py +121 -0
  89. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/serialization_test.py +1 -1
  90. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/table_test.py +26 -26
  91. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/tmp_test.py +1 -1
  92. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/uv.lock +681 -596
  93. kreuzberg-3.13.2/tests/chunker_test.py +0 -102
  94. kreuzberg-3.13.2/tests/cli_command_test.py +0 -481
  95. kreuzberg-3.13.2/tests/cli_integration_test.py +0 -858
  96. kreuzberg-3.13.2/tests/cli_test.py +0 -324
  97. kreuzberg-3.13.2/tests/config_test.py +0 -1540
  98. kreuzberg-3.13.2/tests/document_classification_test.py +0 -837
  99. kreuzberg-3.13.2/tests/entity_extraction_test.py +0 -588
  100. kreuzberg-3.13.2/tests/exceptions_test.py +0 -91
  101. kreuzberg-3.13.2/tests/extraction_batch_test.py +0 -253
  102. kreuzberg-3.13.2/tests/extraction_test.py +0 -752
  103. kreuzberg-3.13.2/tests/gmft_extended_test.py +0 -137
  104. kreuzberg-3.13.2/tests/gmft_test.py +0 -788
  105. kreuzberg-3.13.2/tests/hooks_test.py +0 -205
  106. kreuzberg-3.13.2/tests/html_to_markdown_config_test.py +0 -217
  107. kreuzberg-3.13.2/tests/language_detection_test.py +0 -152
  108. kreuzberg-3.13.2/tests/mcp_server_test.py +0 -757
  109. kreuzberg-3.13.2/tests/mime_types_test.py +0 -195
  110. kreuzberg-3.13.2/tests/multiprocessing/gmft_integration_test.py +0 -98
  111. kreuzberg-3.13.2/tests/ocr/device_integration_test.py +0 -268
  112. kreuzberg-3.13.2/tests/ocr/tesseract_tsv_integration_test.py +0 -273
  113. kreuzberg-3.13.2/tests/playa_helpers_test.py +0 -473
  114. kreuzberg-3.13.2/tests/playa_test.py +0 -111
  115. kreuzberg-3.13.2/tests/registry_test.py +0 -190
  116. kreuzberg-3.13.2/tests/tesseract_sync_formats_test.py +0 -169
  117. kreuzberg-3.13.2/tests/types_test.py +0 -374
  118. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.commitlintrc +0 -0
  119. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.deepsource.toml +0 -0
  120. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.docker/Dockerfile +0 -0
  121. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.docker/README.md +0 -0
  122. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.dockerignore +0 -0
  123. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/dependabot.yaml +0 -0
  124. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/pr-title.yaml +0 -0
  125. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.github/workflows/test-docker-builds.yml +0 -0
  126. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/.markdownlint.yaml +0 -0
  127. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/LICENSE +0 -0
  128. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/README.md +0 -0
  129. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/Taskfile.yml +0 -0
  130. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/README.md +0 -0
  131. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/__init__.py +0 -0
  132. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/pyproject.toml +0 -0
  133. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/__init__.py +0 -0
  134. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/__main__.py +0 -0
  135. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/benchmarks.py +0 -0
  136. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/cli.py +0 -0
  137. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/models.py +0 -0
  138. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/profiler.py +0 -0
  139. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/benchmarks/src/runner.py +0 -0
  140. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docker-compose.example.yml +0 -0
  141. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docker-logs/docker-info.txt +0 -0
  142. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docker-logs/docker-version.txt +0 -0
  143. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/custom-extractors.md +0 -0
  144. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/custom-hooks.md +0 -0
  145. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/error-handling.md +0 -0
  146. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/index.md +0 -0
  147. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/advanced/performance.md +0 -0
  148. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/exceptions.md +0 -0
  149. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/extraction-functions.md +0 -0
  150. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/extractor-registry.md +0 -0
  151. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/index.md +0 -0
  152. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/ocr-configuration.md +0 -0
  153. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/api-reference/types.md +0 -0
  154. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/assets/favicon.png +0 -0
  155. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/assets/logo.png +0 -0
  156. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/cli.md +0 -0
  157. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/contributing.md +0 -0
  158. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/css/extra.css +0 -0
  159. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/examples/index.md +0 -0
  160. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/getting-started/index.md +0 -0
  161. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/getting-started/installation.md +0 -0
  162. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/getting-started/quick-start.md +0 -0
  163. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/index.md +0 -0
  164. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/basic-usage.md +0 -0
  165. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/chunking.md +0 -0
  166. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/docker.md +0 -0
  167. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/document-classification.md +0 -0
  168. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/index.md +0 -0
  169. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/mcp-server.md +0 -0
  170. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/metadata-extraction.md +0 -0
  171. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/ocr-backends.md +0 -0
  172. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/docs/user-guide/supported-formats.md +0 -0
  173. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/__init__.py +0 -0
  174. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/__main__.py +0 -0
  175. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_api/__init__.py +0 -0
  176. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_chunker.py +0 -0
  177. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_constants.py +0 -0
  178. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_document_classification.py +0 -0
  179. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_entity_extraction.py +0 -0
  180. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/__init__.py +0 -0
  181. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_base.py +0 -0
  182. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_email.py +0 -0
  183. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_html.py +0 -0
  184. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_pandoc.py +0 -0
  185. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_presentation.py +0 -0
  186. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_extractors/_structured.py +0 -0
  187. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_language_detection.py +0 -0
  188. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_mcp/__init__.py +0 -0
  189. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_mime_types.py +0 -0
  190. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/__init__.py +0 -0
  191. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_easyocr.py +0 -0
  192. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_ocr/_table_extractor.py +0 -0
  193. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_playa.py +0 -0
  194. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/__init__.py +0 -0
  195. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_device.py +0 -0
  196. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_document_cache.py +0 -0
  197. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_errors.py +0 -0
  198. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_pdf_lock.py +0 -0
  199. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_quality.py +0 -0
  200. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_ref.py +0 -0
  201. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_serialization.py +0 -0
  202. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_string.py +0 -0
  203. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_sync.py +0 -0
  204. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/_utils/_tmp.py +0 -0
  205. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/exceptions.py +0 -0
  206. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/kreuzberg/py.typed +0 -0
  207. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/mkdocs.yaml +0 -0
  208. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/output.txt +0 -0
  209. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/results/baseline.json +0 -0
  210. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/results/serialization.json +0 -0
  211. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/results/statistical.json +0 -0
  212. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/test_report.json +0 -0
  213. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/__init__.py +0 -0
  214. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/api/__init__.py +0 -0
  215. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/api/conftest.py +0 -0
  216. {kreuzberg-3.13.2/tests/e2e → kreuzberg-3.14.0/tests/core}/__init__.py +0 -0
  217. /kreuzberg-3.13.2/tests/extractors/__init__.py → /kreuzberg-3.14.0/tests/core/config_test.py +0 -0
  218. /kreuzberg-3.13.2/tests/multiprocessing/__init__.py → /kreuzberg-3.14.0/tests/core/exceptions_test.py +0 -0
  219. /kreuzberg-3.13.2/tests/ocr/__init__.py → /kreuzberg-3.14.0/tests/core/extraction_batch_test.py +0 -0
  220. /kreuzberg-3.13.2/tests/utils/__init__.py → /kreuzberg-3.14.0/tests/core/extraction_test.py +0 -0
  221. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/email_test.py +0 -0
  222. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/pandoc_metadata_test.py +0 -0
  223. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/extractors/structured_test.py +0 -0
  224. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/multiprocessing/process_manager_test.py +0 -0
  225. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/multiprocessing/tesseract_pool_test.py +0 -0
  226. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/base_test.py +0 -0
  227. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/init_test.py +0 -0
  228. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/ocr/tesseract_tsv_test.py +0 -0
  229. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/contract.txt +0 -0
  230. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/contract_test.txt +0 -0
  231. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/document.docx +0 -0
  232. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/email/sample-email.eml +0 -0
  233. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
  234. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/excel.xlsx +0 -0
  235. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/flower-no-text.jpg +0 -0
  236. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/form_test.txt +0 -0
  237. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/french-text.txt +0 -0
  238. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/german-text.txt +0 -0
  239. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/html.html +0 -0
  240. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/images/test_hello_world.png +0 -0
  241. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/invoice_image.png +0 -0
  242. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/invoice_test.txt +0 -0
  243. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/json/sample-document.json +0 -0
  244. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/layout-parser-ocr.jpg +0 -0
  245. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/markdown.md +0 -0
  246. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/non-ascii-text.pdf +0 -0
  247. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/non-searchable.pdf +0 -0
  248. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/ocr-image.jpg +0 -0
  249. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
  250. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
  251. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
  252. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
  253. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/receipt_test.txt +0 -0
  254. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/report_test.txt +0 -0
  255. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/sample-contract.pdf +0 -0
  256. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/scanned.pdf +0 -0
  257. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/searchable.pdf +0 -0
  258. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/spanish-text.txt +0 -0
  259. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/tables/borderless_table.png +0 -0
  260. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/tables/complex_document.png +0 -0
  261. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/tables/simple_table.png +0 -0
  262. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/test-article.pdf +0 -0
  263. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/test_source_files/yaml/sample-config.yaml +0 -0
  264. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/cache_test.py +0 -0
  265. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/errors_test.py +0 -0
  266. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/pdf_lock_test.py +0 -0
  267. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/process_pool_test.py +0 -0
  268. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/ref_test.py +0 -0
  269. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/string_test.py +0 -0
  270. {kreuzberg-3.13.2 → kreuzberg-3.14.0}/tests/utils/sync_test.py +0 -0
@@ -7,7 +7,6 @@ on:
7
7
  push:
8
8
  branches:
9
9
  - main
10
- - feat/smart-multiprocessing
11
10
 
12
11
  jobs:
13
12
  validate:
@@ -23,7 +22,7 @@ jobs:
23
22
  enable-cache: true
24
23
 
25
24
  - name: Set up Python
26
- uses: actions/setup-python@v5
25
+ uses: actions/setup-python@v6
27
26
  with:
28
27
  python-version-file: "pyproject.toml"
29
28
 
@@ -38,7 +37,7 @@ jobs:
38
37
  echo "Removing existing .venv directory on Windows"
39
38
  rm -rf .venv
40
39
  fi
41
- uv sync --all-packages --all-extras --dev
40
+ uv sync --all-extras --dev
42
41
  shell: bash
43
42
 
44
43
  - name: Load Cached Pre-Commit Dependencies
@@ -53,8 +52,9 @@ jobs:
53
52
 
54
53
  coverage:
55
54
  needs: validate
55
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
56
56
  runs-on: ubuntu-latest
57
- timeout-minutes: 20
57
+ timeout-minutes: 120
58
58
  steps:
59
59
  - name: Checkout
60
60
  uses: actions/checkout@v5
@@ -65,7 +65,7 @@ jobs:
65
65
  enable-cache: true
66
66
 
67
67
  - name: Install Python
68
- uses: actions/setup-python@v5
68
+ uses: actions/setup-python@v6
69
69
  id: setup-python
70
70
  with:
71
71
  python-version: "3.13"
@@ -88,7 +88,7 @@ jobs:
88
88
  max_attempts: 3
89
89
  retry_wait_seconds: 30
90
90
  command: |
91
- uv sync --all-packages --all-extras --dev
91
+ uv sync --all-extras --dev
92
92
  shell: bash
93
93
 
94
94
  - name: Install System Dependencies
@@ -115,7 +115,7 @@ jobs:
115
115
  shell: bash
116
116
 
117
117
  - name: Upload Coverage to DeepSource
118
- if: always() && github.event_name == 'push'
118
+ if: always()
119
119
  env:
120
120
  DEEPSOURCE_DSN: ${{ secrets.DEEPSOURCE_DSN }}
121
121
  run: |
@@ -134,15 +134,178 @@ jobs:
134
134
  .coverage
135
135
  retention-days: 7
136
136
 
137
- test:
138
- needs: coverage
137
+ test-pr:
138
+ needs: validate
139
+ if: github.event_name == 'pull_request' && needs.validate.result == 'success'
140
+ runs-on: ubuntu-latest
141
+ strategy:
142
+ fail-fast: false
143
+ matrix:
144
+ test-category:
145
+ - name: "core"
146
+ path: "tests/core,tests/utils"
147
+ system-deps: false
148
+ timeout: 15
149
+ - name: "extractors"
150
+ path: "tests/extractors"
151
+ system-deps: true
152
+ timeout: 20
153
+ - name: "integration"
154
+ path: "tests/integration,tests/api"
155
+ system-deps: true
156
+ timeout: 25
157
+ - name: "features"
158
+ path: "tests/features,tests/interfaces,tests/mcp,tests/multiprocessing,tests/ocr"
159
+ system-deps: true
160
+ timeout: 20
161
+ timeout-minutes: ${{ matrix.test-category.timeout }}
162
+ steps:
163
+ - name: Checkout
164
+ uses: actions/checkout@v5
165
+
166
+ - name: Install uv
167
+ uses: astral-sh/setup-uv@v6
168
+ with:
169
+ enable-cache: true
170
+
171
+ - name: Install Python
172
+ uses: actions/setup-python@v6
173
+ with:
174
+ python-version: "3.13"
175
+
176
+ - name: Cache Python Dependencies
177
+ uses: actions/cache@v4
178
+ with:
179
+ path: |
180
+ ~/.cache/uv
181
+ .venv
182
+ key: python-dependencies-ubuntu-latest-3.13-${{ matrix.test-category.name }}-${{ hashFiles('uv.lock') }}
183
+ restore-keys: |
184
+ python-dependencies-ubuntu-latest-3.13-
185
+
186
+ - name: Install Dependencies
187
+ run: uv sync --all-extras --dev
188
+
189
+ - name: Install System Dependencies
190
+ if: matrix.test-category.system-deps
191
+ run: |
192
+ sudo apt-get update
193
+ sudo apt-get install -y tesseract-ocr tesseract-ocr-deu pandoc
194
+
195
+ - name: Run Tests - ${{ matrix.test-category.name }}
196
+ run: uv run pytest $(echo "${{ matrix.test-category.path }}" | tr ',' ' ') -v --reruns 1 --reruns-delay 1 --cov=kreuzberg --cov-append --cov-report=lcov:coverage-${{ matrix.test-category.name }}.lcov
197
+
198
+ - name: Upload Coverage Artifacts
199
+ uses: actions/upload-artifact@v4
200
+ with:
201
+ name: coverage-${{ matrix.test-category.name }}-${{ github.sha }}
202
+ path: coverage-${{ matrix.test-category.name }}.lcov
203
+ retention-days: 1
204
+
205
+ coverage-pr:
206
+ needs: test-pr
207
+ if: github.event_name == 'pull_request' && always()
208
+ runs-on: ubuntu-latest
209
+ timeout-minutes: 10
210
+ steps:
211
+ - name: Checkout
212
+ uses: actions/checkout@v5
213
+
214
+ - name: Download Coverage Artifacts
215
+ uses: actions/download-artifact@v4
216
+ with:
217
+ pattern: coverage-*-${{ github.sha }}
218
+ merge-multiple: true
219
+
220
+ - name: Install uv
221
+ uses: astral-sh/setup-uv@v6
222
+ with:
223
+ enable-cache: true
224
+
225
+ - name: Install Python
226
+ uses: actions/setup-python@v6
227
+ with:
228
+ python-version: "3.13"
229
+
230
+ - name: Install Dependencies
231
+ run: uv sync --dev
232
+
233
+ - name: Combine Coverage Reports
234
+ run: |
235
+ # Install lcov for combining reports
236
+ sudo apt-get update && sudo apt-get install -y lcov
237
+
238
+ # List available coverage files
239
+ echo "Available coverage files:"
240
+ find . -name "coverage-*.lcov" -type f || echo "No coverage files found"
241
+
242
+ # Combine all lcov files if they exist
243
+ coverage_files=($(find . -name "coverage-*.lcov" -type f))
244
+ if [ ${#coverage_files[@]} -gt 0 ]; then
245
+ echo "Combining ${#coverage_files[@]} coverage files..."
246
+ if [ ${#coverage_files[@]} -eq 1 ]; then
247
+ # Only one file, just copy it
248
+ cp "${coverage_files[0]}" coverage.lcov
249
+ else
250
+ # Multiple files, combine them
251
+ lcov --rc branch_coverage=1 $(printf " -a %s" "${coverage_files[@]}") -o coverage.lcov
252
+ fi
253
+ else
254
+ echo "No coverage files to combine, creating empty coverage.lcov"
255
+ echo "TN:" > coverage.lcov
256
+ echo "end_of_record" >> coverage.lcov
257
+ fi
258
+
259
+ - name: Upload Coverage to DeepSource
260
+ if: always()
261
+ env:
262
+ DEEPSOURCE_DSN: ${{ secrets.DEEPSOURCE_DSN }}
263
+ run: |
264
+ # Install DeepSource CLI
265
+ curl -fsSL https://deepsource.io/cli | sh
266
+ # Upload coverage report
267
+ ./bin/deepsource report --analyzer test-coverage --key python --value-file ./coverage.lcov
268
+
269
+ test-full:
270
+ needs: validate
271
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.validate.result == 'success'
139
272
  runs-on: ${{ matrix.os }}
140
273
  strategy:
141
274
  fail-fast: false
142
275
  matrix:
143
276
  os: [ubuntu-latest, windows-latest, macos-latest]
144
277
  python: ["3.10", "3.11", "3.12", "3.13"]
145
- timeout-minutes: 30
278
+ test-category:
279
+ - name: "core"
280
+ path: "tests/core,tests/utils"
281
+ system-deps: false
282
+ timeout: 20
283
+ - name: "extractors"
284
+ path: "tests/extractors"
285
+ system-deps: true
286
+ timeout: 25
287
+ - name: "integration"
288
+ path: "tests/integration,tests/api"
289
+ system-deps: true
290
+ timeout: 30
291
+ - name: "features"
292
+ path: "tests/features,tests/interfaces,tests/mcp,tests/multiprocessing,tests/ocr"
293
+ system-deps: true
294
+ timeout: 25
295
+ exclude:
296
+ - test-category: {name: "extractors"}
297
+ python: "3.11"
298
+ - test-category: {name: "extractors"}
299
+ python: "3.12"
300
+ - test-category: {name: "integration"}
301
+ python: "3.11"
302
+ - test-category: {name: "integration"}
303
+ python: "3.12"
304
+ - test-category: {name: "features"}
305
+ python: "3.11"
306
+ - test-category: {name: "features"}
307
+ python: "3.12"
308
+ timeout-minutes: ${{ matrix.test-category.timeout }}
146
309
  steps:
147
310
  - name: Checkout
148
311
  uses: actions/checkout@v5
@@ -153,7 +316,7 @@ jobs:
153
316
  enable-cache: true
154
317
 
155
318
  - name: Install Python
156
- uses: actions/setup-python@v5
319
+ uses: actions/setup-python@v6
157
320
  id: setup-python
158
321
  with:
159
322
  python-version: ${{ matrix.python }}
@@ -180,7 +343,7 @@ jobs:
180
343
  echo "Removing existing .venv directory on Windows"
181
344
  rm -rf .venv
182
345
  fi
183
- uv sync --all-packages --all-extras --dev
346
+ uv sync --all-extras --dev
184
347
  shell: bash
185
348
 
186
349
  - name: Cache Test Artifacts
@@ -190,7 +353,7 @@ jobs:
190
353
  key: pytest-cache-${{ matrix.os }}-${{ matrix.python }}
191
354
 
192
355
  - name: Cache and Install Homebrew (macOS)
193
- if: runner.os == 'macOS'
356
+ if: runner.os == 'macOS' && matrix.test-category.system-deps
194
357
  uses: nick-fields/retry@v3
195
358
  with:
196
359
  timeout_minutes: 10
@@ -204,7 +367,7 @@ jobs:
204
367
  shell: bash
205
368
 
206
369
  - name: Cache and Install APT Packages (Linux)
207
- if: runner.os == 'Linux'
370
+ if: runner.os == 'Linux' && matrix.test-category.system-deps
208
371
  uses: nick-fields/retry@v3
209
372
  with:
210
373
  timeout_minutes: 5
@@ -216,7 +379,7 @@ jobs:
216
379
  shell: bash
217
380
 
218
381
  - name: Install System Dependencies (Windows)
219
- if: runner.os == 'Windows'
382
+ if: runner.os == 'Windows' && matrix.test-category.system-deps
220
383
  uses: nick-fields/retry@v3
221
384
  with:
222
385
  timeout_minutes: 10
@@ -231,12 +394,12 @@ jobs:
231
394
  pandoc --version
232
395
  shell: pwsh
233
396
 
234
- - name: Run Tests (without coverage)
397
+ - name: Run Tests - ${{ matrix.test-category.name }}
235
398
  uses: nick-fields/retry@v3
236
399
  with:
237
- timeout_minutes: 15
238
- max_attempts: 3
239
- retry_wait_seconds: 10
400
+ timeout_minutes: 10
401
+ max_attempts: 2
402
+ retry_wait_seconds: 5
240
403
  command: |
241
- uv run pytest -s -vvv --reruns 2 --reruns-delay 1
404
+ uv run pytest $(echo "${{ matrix.test-category.path }}" | tr ',' ' ') -v --reruns 1 --reruns-delay 1
242
405
  shell: bash
@@ -7,7 +7,7 @@ on:
7
7
  jobs:
8
8
  test-docker-images:
9
9
  runs-on: ubuntu-latest
10
- timeout-minutes: 60
10
+ timeout-minutes: 360
11
11
  strategy:
12
12
  matrix:
13
13
  image:
@@ -25,7 +25,7 @@ jobs:
25
25
  enable-cache: true
26
26
 
27
27
  - name: Set up Python
28
- uses: actions/setup-python@v5
28
+ uses: actions/setup-python@v6
29
29
  with:
30
30
  python-version-file: "pyproject.toml"
31
31
 
@@ -48,7 +48,6 @@ jobs:
48
48
  sudo rm -rf /opt/ghc
49
49
  sudo rm -rf /opt/hostedtoolcache/CodeQL
50
50
  sudo rm -rf /usr/local/share/boost
51
- sudo rm -rf "$AGENT_TOOLSDIRECTORY"
52
51
 
53
52
  sudo apt-get clean
54
53
  sudo apt-get autoremove -y
@@ -29,7 +29,7 @@ jobs:
29
29
  fetch-depth: 0
30
30
 
31
31
  - name: Setup Python
32
- uses: actions/setup-python@v5
32
+ uses: actions/setup-python@v6
33
33
  with:
34
34
  python-version: '3.11'
35
35
 
@@ -28,6 +28,7 @@ jobs:
28
28
  needs: test-images
29
29
  if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'release' }}
30
30
  runs-on: ubuntu-latest
31
+ timeout-minutes: 360
31
32
  permissions:
32
33
  contents: read
33
34
  packages: write
@@ -62,7 +63,6 @@ jobs:
62
63
  sudo rm -rf /usr/local/lib/node_modules
63
64
  sudo rm -rf /opt/microsoft
64
65
  sudo rm -rf /usr/local/.ghcup
65
- sudo rm -rf /opt/hostedtoolcache
66
66
 
67
67
  # Clean apt
68
68
  sudo apt-get clean
@@ -21,7 +21,7 @@ jobs:
21
21
  enable-cache: true
22
22
 
23
23
  - name: Set up Python
24
- uses: actions/setup-python@v5
24
+ uses: actions/setup-python@v6
25
25
  with:
26
26
  python-version-file: "pyproject.toml"
27
27
 
@@ -66,3 +66,6 @@ yarn-error.log*
66
66
  *.tmp
67
67
  *.temp
68
68
  .tmp/
69
+
70
+ # AI Rules generated files
71
+ .claude/agents/
@@ -5,11 +5,6 @@ repos:
5
5
  - id: commitlint
6
6
  stages: [commit-msg]
7
7
  additional_dependencies: ["@commitlint/config-conventional"]
8
- - repo: https://github.com/Goldziher/ai-rulez
9
- rev: v1.6.1
10
- hooks:
11
- - id: ai-rulez-validate
12
- - id: ai-rulez-generate
13
8
  - repo: https://github.com/pre-commit/pre-commit-hooks
14
9
  rev: v6.0.0
15
10
  hooks:
@@ -37,7 +32,7 @@ repos:
37
32
  hooks:
38
33
  - id: markdownlint-fix
39
34
  - repo: https://github.com/adamchainz/blacken-docs
40
- rev: 1.19.1
35
+ rev: 1.20.0
41
36
  hooks:
42
37
  - id: blacken-docs
43
38
  args: ["--pyi", "--line-length", "130"]
@@ -53,7 +48,7 @@ repos:
53
48
  hooks:
54
49
  - id: pyproject-fmt
55
50
  - repo: https://github.com/astral-sh/ruff-pre-commit
56
- rev: v0.12.11
51
+ rev: v0.13.0
57
52
  hooks:
58
53
  - id: ruff
59
54
  args: ["--fix", "--unsafe-fixes"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.13.2
3
+ Version: 3.14.0
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -31,15 +31,15 @@ Requires-Python: >=3.10
31
31
  Requires-Dist: anyio>=4.10.0
32
32
  Requires-Dist: chardetng-py>=0.3.5
33
33
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
- Requires-Dist: html-to-markdown[lxml]>=1.9.1
35
- Requires-Dist: mcp>=1.13.0
34
+ Requires-Dist: html-to-markdown[lxml]>=1.11.0
35
+ Requires-Dist: mcp>=1.14.0
36
36
  Requires-Dist: msgspec>=0.18.0
37
- Requires-Dist: numpy>=1.24.0
37
+ Requires-Dist: numpy>=2.0.0
38
38
  Requires-Dist: playa-pdf>=0.7.0
39
- Requires-Dist: polars>=1.33.0
39
+ Requires-Dist: polars>=1.33.1
40
40
  Requires-Dist: psutil>=7.0.0
41
41
  Requires-Dist: pypdfium2==4.30.0
42
- Requires-Dist: python-calamine>=0.5.2
42
+ Requires-Dist: python-calamine>=0.5.3
43
43
  Requires-Dist: python-pptx>=1.0.2
44
44
  Requires-Dist: typing-extensions>=4.15.0; python_version < '3.12'
45
45
  Provides-Extra: additional-extensions
@@ -55,17 +55,17 @@ Requires-Dist: keybert>=0.9.0; extra == 'all'
55
55
  Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
56
56
  Requires-Dist: mailparse>=1.0.15; extra == 'all'
57
57
  Requires-Dist: paddleocr>=3.2.0; extra == 'all'
58
- Requires-Dist: paddlepaddle>=3.1.1; extra == 'all'
58
+ Requires-Dist: paddlepaddle>=3.2.0; extra == 'all'
59
59
  Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
60
60
  Requires-Dist: rich>=14.1.0; extra == 'all'
61
- Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
61
+ Requires-Dist: semantic-text-splitter>=0.28.0; extra == 'all'
62
62
  Requires-Dist: setuptools>=80.9.0; extra == 'all'
63
63
  Requires-Dist: spacy>=3.8.7; extra == 'all'
64
64
  Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
65
65
  Provides-Extra: api
66
66
  Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'api'
67
67
  Provides-Extra: chunking
68
- Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
68
+ Requires-Dist: semantic-text-splitter>=0.28.0; extra == 'chunking'
69
69
  Provides-Extra: cli
70
70
  Requires-Dist: click>=8.2.1; extra == 'cli'
71
71
  Requires-Dist: rich>=14.1.0; extra == 'cli'
@@ -85,7 +85,7 @@ Provides-Extra: langdetect
85
85
  Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
86
86
  Provides-Extra: paddleocr
87
87
  Requires-Dist: paddleocr>=3.2.0; extra == 'paddleocr'
88
- Requires-Dist: paddlepaddle>=3.1.1; extra == 'paddleocr'
88
+ Requires-Dist: paddlepaddle>=3.2.0; extra == 'paddleocr'
89
89
  Requires-Dist: setuptools>=80.9.0; extra == 'paddleocr'
90
90
  Description-Content-Type: text/markdown
91
91