kreuzberg 3.6.0__tar.gz → 3.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.docker/Dockerfile +4 -4
  2. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.github/workflows/publish-docker.yml +26 -5
  3. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.github/workflows/release.yaml +5 -11
  4. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.pre-commit-config.yaml +2 -2
  5. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/PKG-INFO +19 -14
  6. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/README.md +17 -12
  7. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/pyproject.toml +2 -2
  8. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extraction_test.py +2 -2
  9. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/uv.lock +60 -55
  10. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.commitlintrc +0 -0
  11. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.docker/README.md +0 -0
  12. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.dockerignore +0 -0
  13. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.github/dependabot.yaml +0 -0
  14. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.github/workflows/ci.yaml +0 -0
  15. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.github/workflows/docs.yml +0 -0
  16. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.github/workflows/pr-title.yaml +0 -0
  17. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.gitignore +0 -0
  18. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.gitmodules +0 -0
  19. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/.markdownlint.yaml +0 -0
  20. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/LICENSE +0 -0
  21. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/ai-rulez.yaml +0 -0
  22. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/README.md +0 -0
  23. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/benchmark_baseline.py +0 -0
  24. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/end_to_end_benchmark.py +0 -0
  25. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/final_benchmark.py +0 -0
  26. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/pyproject.toml +0 -0
  27. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/baseline_results.json +0 -0
  28. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/benchmark_msgpack_20250702_003800.json +0 -0
  29. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/comprehensive_caching_results.json +0 -0
  30. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/final_benchmark_results.json +0 -0
  31. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/mime_caching_results.json +0 -0
  32. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/msgspec_caching_results.json +0 -0
  33. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/ocr_caching_results.json +0 -0
  34. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/serialization_benchmark_results.json +0 -0
  35. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/statistical_benchmark_results.json +0 -0
  36. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/results/table_caching_results.json +0 -0
  37. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/serialization_benchmark.py +0 -0
  38. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/src/kreuzberg_benchmarks/__init__.py +0 -0
  39. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/src/kreuzberg_benchmarks/__main__.py +0 -0
  40. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/src/kreuzberg_benchmarks/benchmarks.py +0 -0
  41. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/src/kreuzberg_benchmarks/cli.py +0 -0
  42. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/src/kreuzberg_benchmarks/models.py +0 -0
  43. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/src/kreuzberg_benchmarks/profiler.py +0 -0
  44. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/src/kreuzberg_benchmarks/runner.py +0 -0
  45. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/benchmarks/statistical_benchmark.py +0 -0
  46. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/advanced/custom-extractors.md +0 -0
  47. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/advanced/custom-hooks.md +0 -0
  48. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/advanced/error-handling.md +0 -0
  49. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/advanced/index.md +0 -0
  50. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/advanced/performance.md +0 -0
  51. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/api-reference/exceptions.md +0 -0
  52. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/api-reference/extraction-functions.md +0 -0
  53. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/api-reference/extractor-registry.md +0 -0
  54. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/api-reference/index.md +0 -0
  55. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/api-reference/ocr-configuration.md +0 -0
  56. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/api-reference/types.md +0 -0
  57. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/assets/favicon.png +0 -0
  58. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/assets/logo.png +0 -0
  59. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/changelog.md +0 -0
  60. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/cli.md +0 -0
  61. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/contributing.md +0 -0
  62. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/css/extra.css +0 -0
  63. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/examples/extraction-examples.md +0 -0
  64. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/examples/index.md +0 -0
  65. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/getting-started/index.md +0 -0
  66. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/getting-started/installation.md +0 -0
  67. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/getting-started/quick-start.md +0 -0
  68. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/index.md +0 -0
  69. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/api-server.md +0 -0
  70. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/basic-usage.md +0 -0
  71. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/chunking.md +0 -0
  72. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/docker.md +0 -0
  73. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/extraction-configuration.md +0 -0
  74. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/index.md +0 -0
  75. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/metadata-extraction.md +0 -0
  76. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/ocr-backends.md +0 -0
  77. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/ocr-configuration.md +0 -0
  78. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/docs/user-guide/supported-formats.md +0 -0
  79. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/__init__.py +0 -0
  80. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/__main__.py +0 -0
  81. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_api/__init__.py +0 -0
  82. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_api/main.py +0 -0
  83. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_chunker.py +0 -0
  84. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_cli_config.py +0 -0
  85. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_constants.py +0 -0
  86. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_entity_extraction.py +0 -0
  87. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_extractors/__init__.py +0 -0
  88. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_extractors/_base.py +0 -0
  89. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_extractors/_html.py +0 -0
  90. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_extractors/_image.py +0 -0
  91. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_extractors/_pandoc.py +0 -0
  92. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_extractors/_pdf.py +0 -0
  93. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_extractors/_presentation.py +0 -0
  94. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_extractors/_spread_sheet.py +0 -0
  95. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_gmft.py +0 -0
  96. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_language_detection.py +0 -0
  97. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_mime_types.py +0 -0
  98. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_multiprocessing/__init__.py +0 -0
  99. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_multiprocessing/gmft_isolated.py +0 -0
  100. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_multiprocessing/process_manager.py +0 -0
  101. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_multiprocessing/sync_easyocr.py +0 -0
  102. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_multiprocessing/sync_paddleocr.py +0 -0
  103. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_multiprocessing/sync_tesseract.py +0 -0
  104. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_multiprocessing/tesseract_pool.py +0 -0
  105. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_ocr/__init__.py +0 -0
  106. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_ocr/_base.py +0 -0
  107. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_ocr/_easyocr.py +0 -0
  108. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_ocr/_paddleocr.py +0 -0
  109. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_ocr/_tesseract.py +0 -0
  110. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_playa.py +0 -0
  111. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_registry.py +0 -0
  112. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_types.py +0 -0
  113. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/__init__.py +0 -0
  114. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_cache.py +0 -0
  115. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_device.py +0 -0
  116. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_document_cache.py +0 -0
  117. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_errors.py +0 -0
  118. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_pdf_lock.py +0 -0
  119. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_process_pool.py +0 -0
  120. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_serialization.py +0 -0
  121. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_string.py +0 -0
  122. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_sync.py +0 -0
  123. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/_utils/_tmp.py +0 -0
  124. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/cli.py +0 -0
  125. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/exceptions.py +0 -0
  126. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/extraction.py +0 -0
  127. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/kreuzberg/py.typed +0 -0
  128. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/mkdocs.yaml +0 -0
  129. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/__init__.py +0 -0
  130. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/api/__init__.py +0 -0
  131. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/api/main_test.py +0 -0
  132. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/chunker_test.py +0 -0
  133. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/cli_integration_test.py +0 -0
  134. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/cli_test.py +0 -0
  135. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/conftest.py +0 -0
  136. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/entity_extraction_test.py +0 -0
  137. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/exceptions_test.py +0 -0
  138. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extraction_batch_test.py +0 -0
  139. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extractors/__init__.py +0 -0
  140. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extractors/html_test.py +0 -0
  141. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extractors/image_test.py +0 -0
  142. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extractors/pandoc_metadata_test.py +0 -0
  143. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extractors/pandoc_test.py +0 -0
  144. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extractors/pdf_test.py +0 -0
  145. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extractors/presentation_test.py +0 -0
  146. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/extractors/spreed_sheet_test.py +0 -0
  147. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/gmft_extended_test.py +0 -0
  148. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/gmft_test.py +0 -0
  149. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/hooks_test.py +0 -0
  150. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/language_detection_test.py +0 -0
  151. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/mime_types_test.py +0 -0
  152. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/multiprocessing/__init__.py +0 -0
  153. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/multiprocessing/gmft_integration_test.py +0 -0
  154. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/multiprocessing/process_manager_test.py +0 -0
  155. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/multiprocessing/sync_tesseract_test.py +0 -0
  156. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/multiprocessing/tesseract_pool_test.py +0 -0
  157. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/ocr/__init__.py +0 -0
  158. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/ocr/base_test.py +0 -0
  159. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/ocr/device_integration_test.py +0 -0
  160. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/ocr/easyocr_test.py +0 -0
  161. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/ocr/init_test.py +0 -0
  162. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/ocr/paddleocr_test.py +0 -0
  163. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/ocr/tesseract_test.py +0 -0
  164. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/playa_test.py +0 -0
  165. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/registry_test.py +0 -0
  166. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/document.docx +0 -0
  167. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
  168. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/excel.xlsx +0 -0
  169. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/french-text.txt +0 -0
  170. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/german-text.txt +0 -0
  171. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/html.html +0 -0
  172. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/markdown.md +0 -0
  173. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/non-ascii-text.pdf +0 -0
  174. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/non-searchable.pdf +0 -0
  175. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/ocr-image.jpg +0 -0
  176. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
  177. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
  178. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
  179. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
  180. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/sample-contract.pdf +0 -0
  181. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/scanned.pdf +0 -0
  182. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/searchable.pdf +0 -0
  183. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/spanish-text.txt +0 -0
  184. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/test_source_files/test-article.pdf +0 -0
  185. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/types_test.py +0 -0
  186. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/__init__.py +0 -0
  187. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/cache_test.py +0 -0
  188. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/device_test.py +0 -0
  189. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/errors_test.py +0 -0
  190. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/pdf_lock_test.py +0 -0
  191. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/process_pool_test.py +0 -0
  192. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/serialization_test.py +0 -0
  193. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/string_test.py +0 -0
  194. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/sync_test.py +0 -0
  195. {kreuzberg-3.6.0 → kreuzberg-3.6.2}/tests/utils/tmp_test.py +0 -0
@@ -1,8 +1,8 @@
1
- FROM ghcr.io/astral-sh/uv:python3.13-bookworm as app
1
+ FROM ghcr.io/astral-sh/uv:python3.13-bookworm AS app
2
2
  ARG EXTRAS=""
3
3
  WORKDIR /app
4
- ENV PYTHONDONTWRITEBYTECODE 1
5
- ENV PYTHONUNBUFFERED 1
4
+ ENV PYTHONDONTWRITEBYTECODE=1
5
+ ENV PYTHONUNBUFFERED=1
6
6
  ENV UV_LINK_MODE=copy
7
7
 
8
8
  RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -18,4 +18,4 @@ RUN uv sync --extra api${EXTRAS:+ --extra ${EXTRAS}} --no-editable --no-dev --co
18
18
 
19
19
  RUN groupadd -r appuser && useradd -r -g appuser -d /app -s /sbin/nologin appuser
20
20
  USER appuser
21
- CMD ["litestar", "--app", "kreuzberg._api.main:app", "run", "--host", "0.0.0.0"]
21
+ CMD ["/app/.venv/bin/litestar", "--app", "kreuzberg._api.main:app", "run", "--host", "0.0.0.0"]
@@ -3,16 +3,19 @@ name: Publish Docker Images
3
3
 
4
4
  on:
5
5
  workflow_dispatch:
6
+ release:
7
+ types: [published]
6
8
 
7
9
  jobs:
8
10
  build-and-push:
9
11
  runs-on: ubuntu-latest
10
- if: ${{ github.event_name == 'workflow_dispatch' }}
12
+ if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'release' }}
11
13
  permissions:
12
14
  contents: read
13
15
  packages: write
14
16
 
15
17
  strategy:
18
+ max-parallel: 2
16
19
  matrix:
17
20
  include:
18
21
  - name: core
@@ -32,6 +35,16 @@ jobs:
32
35
  tag_suffix: "-all"
33
36
 
34
37
  steps:
38
+ - name: Free up disk space
39
+ run: |
40
+ # Remove large unnecessary packages to free up space
41
+ sudo rm -rf /usr/share/dotnet
42
+ sudo rm -rf /usr/local/lib/android
43
+ sudo rm -rf /opt/ghc
44
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
45
+ sudo docker system prune -af
46
+ df -h
47
+
35
48
  - name: Checkout repository
36
49
  uses: actions/checkout@v4
37
50
  with:
@@ -40,10 +53,16 @@ jobs:
40
53
  - name: Get release version
41
54
  id: get_version
42
55
  run: |
43
- # Get the latest tag by listing all tags
44
- git fetch --tags
45
- VERSION=$(git tag --sort=-version:refname | head -n1)
56
+ if [ "${{ github.event_name }}" = "release" ]; then
57
+ # For release events, use the release tag
58
+ VERSION="${{ github.event.release.tag_name }}"
59
+ else
60
+ # For workflow_dispatch, get the latest tag
61
+ git fetch --tags
62
+ VERSION=$(git tag --sort=-version:refname | head -n1)
63
+ fi
46
64
  echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
65
+ echo "Using version: $VERSION"
47
66
 
48
67
  - name: Set up QEMU
49
68
  uses: docker/setup-qemu-action@v3
@@ -73,12 +92,14 @@ jobs:
73
92
  with:
74
93
  context: .
75
94
  file: ./.docker/Dockerfile
76
- platforms: linux/amd64,linux/arm64
95
+ platforms: ${{ matrix.name == 'all' && 'linux/amd64' || 'linux/amd64,linux/arm64' }}
77
96
  push: true
78
97
  build-args: |
79
98
  EXTRAS=${{ matrix.extras }}
80
99
  tags: ${{ steps.meta.outputs.tags }}
81
100
  labels: ${{ steps.meta.outputs.labels }}
101
+ cache-from: type=gha
102
+ cache-to: type=gha,mode=max
82
103
 
83
104
  - name: Update Docker Hub README
84
105
  uses: peter-evans/dockerhub-description@v4
@@ -10,6 +10,7 @@ jobs:
10
10
  environment: pypi
11
11
  permissions:
12
12
  id-token: write
13
+ contents: read
13
14
  steps:
14
15
  - name: Checkout
15
16
  uses: actions/checkout@v4
@@ -30,14 +31,7 @@ jobs:
30
31
  - name: Publish
31
32
  uses: pypa/gh-action-pypi-publish@release/v1
32
33
 
33
- - name: Trigger Docker Build
34
- uses: actions/github-script@v7
35
- with:
36
- github-token: ${{ secrets.GITHUB_TOKEN }}
37
- script: |
38
- await github.rest.actions.createWorkflowDispatch({
39
- owner: context.repo.owner,
40
- repo: context.repo.repo,
41
- workflow_id: 'publish-docker.yml',
42
- ref: 'main'
43
- });
34
+ - name: Docker Build Info
35
+ run: |
36
+ echo "Docker images will be built automatically by the publish-docker.yml workflow"
37
+ echo "triggered by this release event. No manual triggering needed."
@@ -6,7 +6,7 @@ repos:
6
6
  stages: [commit-msg]
7
7
  additional_dependencies: ["@commitlint/config-conventional"]
8
8
  - repo: https://github.com/Goldziher/ai-rulez
9
- rev: v1.1.2
9
+ rev: v1.1.4
10
10
  hooks:
11
11
  - id: ai-rulez-validate
12
12
  - id: ai-rulez-generate
@@ -53,7 +53,7 @@ repos:
53
53
  hooks:
54
54
  - id: pyproject-fmt
55
55
  - repo: https://github.com/astral-sh/ruff-pre-commit
56
- rev: v0.12.1
56
+ rev: v0.12.2
57
57
  hooks:
58
58
  - id: ruff
59
59
  args: ["--fix", "--unsafe-fixes"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.6.0
3
+ Version: 3.6.2
4
4
  Summary: A text extraction library supporting PDFs, images, office documents and more
5
5
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
6
6
  Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
@@ -25,7 +25,7 @@ Requires-Python: >=3.10
25
25
  Requires-Dist: anyio>=4.9.0
26
26
  Requires-Dist: charset-normalizer>=3.4.2
27
27
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
28
- Requires-Dist: html-to-markdown>=1.4.0
28
+ Requires-Dist: html-to-markdown[lxml]>=1.6.0
29
29
  Requires-Dist: msgspec>=0.18.0
30
30
  Requires-Dist: playa-pdf>=0.6.1
31
31
  Requires-Dist: psutil>=7.0.0
@@ -83,8 +83,8 @@ Description-Content-Type: text/markdown
83
83
 
84
84
  ## Why Kreuzberg?
85
85
 
86
- - **🚀 Fastest Performance**: [Benchmarked](https://github.com/Goldziher/python-text-extraction-libs-benchmarks) as the fastest text extraction library
87
- - **💾 Memory Efficient**: 14x smaller than alternatives (71MB vs 1GB+)
86
+ - **🚀 Fastest Performance**: [35+ files/second](https://goldziher.github.io/python-text-extraction-libs-benchmarks/) - the fastest text extraction library
87
+ - **💾 Memory Efficient**: 14x smaller than alternatives (71MB vs 1GB+) with lowest memory usage (~530MB)
88
88
  - **⚡ Dual APIs**: Only library with both sync and async support
89
89
  - **🔧 Zero Configuration**: Works out of the box with sane defaults
90
90
  - **🏠 Local Processing**: No cloud dependencies or external API calls
@@ -140,13 +140,13 @@ asyncio.run(main())
140
140
 
141
141
  ```bash
142
142
  # Run API server
143
- docker run -p 8000:8000 goldziher/kreuzberg:3.4.0
143
+ docker run -p 8000:8000 goldziher/kreuzberg:latest
144
144
 
145
145
  # Extract files
146
146
  curl -X POST http://localhost:8000/extract -F "data=@document.pdf"
147
147
  ```
148
148
 
149
- Available variants: `3.4.0`, `3.4.0-easyocr`, `3.4.0-paddle`, `3.4.0-gmft`, `3.4.0-all`
149
+ Available variants: `latest`, `3.6.1`, `3.6.1-easyocr`, `3.6.1-paddle`, `3.6.1-gmft`, `3.6.1-all`
150
150
 
151
151
  ### 🌐 REST API
152
152
 
@@ -191,15 +191,20 @@ kreuzberg extract *.pdf --output-dir ./extracted/
191
191
 
192
192
  ## Performance
193
193
 
194
- **Fastest extraction speeds** with minimal resource usage:
194
+ **[Comprehensive benchmarks](https://goldziher.github.io/python-text-extraction-libs-benchmarks/)** across 94 real-world documents (~210MB) • [View source](https://github.com/Goldziher/python-text-extraction-libs-benchmarks):
195
195
 
196
- | Library | Speed | Memory | Size | Success Rate |
197
- | ------------- | -------------- | ------------- | ----------- | ------------ |
198
- | **Kreuzberg** | **Fastest** | 💾 **Lowest** | 📦 **71MB** | **100%** |
199
- | Unstructured | 2-3x slower | 2x higher | 146MB | 95% |
200
- | MarkItDown | 3-4x slower | 3x higher | 251MB | 90% |
201
- | Docling | 4-5x slower | 10x higher | 1,032MB | 85% |
196
+ | Library | Speed | Memory | Install Size | Dependencies | Success Rate |
197
+ | ------------- | --------------- | --------- | ------------ | ------------ | ------------ |
198
+ | **Kreuzberg** | **35+ files/s** | **530MB** | **71MB** | **20** | High\* |
199
+ | Unstructured | Moderate | ~1GB | 146MB | 54 | 88%+ |
200
+ | MarkItDown | Good† | ~1.5GB | 251MB | 25 | 80%† |
201
+ | Docling | 60+ min/file‡ | ~5GB | 1,032MB | 88 | Low‡ |
202
202
 
203
+ \*_Can achieve 75% reliability with 15% performance trade-off when configured_
204
+ †_Good on simple documents, struggles with large/complex files (>10MB)_
205
+ ‡_Frequently fails/times out on medium files (>1MB)_
206
+
207
+ > **Benchmark details**: Tested across PDFs, Word docs, HTML, images, spreadsheets in 6 languages (English, Hebrew, German, Chinese, Japanese, Korean)
203
208
  > **Rule of thumb**: Use async API for complex documents and batch processing (up to 4.5x faster)
204
209
 
205
210
  ## Documentation
@@ -233,7 +238,7 @@ ______________________________________________________________________
233
238
 
234
239
  <div align="center">
235
240
 
236
- **[Documentation](https://goldziher.github.io/kreuzberg/) • [PyPI](https://pypi.org/project/kreuzberg/) • [Docker Hub](https://hub.docker.com/r/goldziher/kreuzberg) • [Discord](https://discord.gg/pXxagNK2zN)**
241
+ **[Documentation](https://goldziher.github.io/kreuzberg/) • [PyPI](https://pypi.org/project/kreuzberg/) • [Docker Hub](https://hub.docker.com/r/goldziher/kreuzberg) • [Benchmarks](https://github.com/Goldziher/python-text-extraction-libs-benchmarks) • [Discord](https://discord.gg/pXxagNK2zN)**
237
242
 
238
243
  Made with ❤️ by the [Kreuzberg contributors](https://github.com/Goldziher/kreuzberg/graphs/contributors)
239
244
 
@@ -11,8 +11,8 @@
11
11
 
12
12
  ## Why Kreuzberg?
13
13
 
14
- - **🚀 Fastest Performance**: [Benchmarked](https://github.com/Goldziher/python-text-extraction-libs-benchmarks) as the fastest text extraction library
15
- - **💾 Memory Efficient**: 14x smaller than alternatives (71MB vs 1GB+)
14
+ - **🚀 Fastest Performance**: [35+ files/second](https://goldziher.github.io/python-text-extraction-libs-benchmarks/) - the fastest text extraction library
15
+ - **💾 Memory Efficient**: 14x smaller than alternatives (71MB vs 1GB+) with lowest memory usage (~530MB)
16
16
  - **⚡ Dual APIs**: Only library with both sync and async support
17
17
  - **🔧 Zero Configuration**: Works out of the box with sane defaults
18
18
  - **🏠 Local Processing**: No cloud dependencies or external API calls
@@ -68,13 +68,13 @@ asyncio.run(main())
68
68
 
69
69
  ```bash
70
70
  # Run API server
71
- docker run -p 8000:8000 goldziher/kreuzberg:3.4.0
71
+ docker run -p 8000:8000 goldziher/kreuzberg:latest
72
72
 
73
73
  # Extract files
74
74
  curl -X POST http://localhost:8000/extract -F "data=@document.pdf"
75
75
  ```
76
76
 
77
- Available variants: `3.4.0`, `3.4.0-easyocr`, `3.4.0-paddle`, `3.4.0-gmft`, `3.4.0-all`
77
+ Available variants: `latest`, `3.6.1`, `3.6.1-easyocr`, `3.6.1-paddle`, `3.6.1-gmft`, `3.6.1-all`
78
78
 
79
79
  ### 🌐 REST API
80
80
 
@@ -119,15 +119,20 @@ kreuzberg extract *.pdf --output-dir ./extracted/
119
119
 
120
120
  ## Performance
121
121
 
122
- **Fastest extraction speeds** with minimal resource usage:
122
+ **[Comprehensive benchmarks](https://goldziher.github.io/python-text-extraction-libs-benchmarks/)** across 94 real-world documents (~210MB) • [View source](https://github.com/Goldziher/python-text-extraction-libs-benchmarks):
123
123
 
124
- | Library | Speed | Memory | Size | Success Rate |
125
- | ------------- | -------------- | ------------- | ----------- | ------------ |
126
- | **Kreuzberg** | **Fastest** | 💾 **Lowest** | 📦 **71MB** | **100%** |
127
- | Unstructured | 2-3x slower | 2x higher | 146MB | 95% |
128
- | MarkItDown | 3-4x slower | 3x higher | 251MB | 90% |
129
- | Docling | 4-5x slower | 10x higher | 1,032MB | 85% |
124
+ | Library | Speed | Memory | Install Size | Dependencies | Success Rate |
125
+ | ------------- | --------------- | --------- | ------------ | ------------ | ------------ |
126
+ | **Kreuzberg** | **35+ files/s** | **530MB** | **71MB** | **20** | High\* |
127
+ | Unstructured | Moderate | ~1GB | 146MB | 54 | 88%+ |
128
+ | MarkItDown | Good† | ~1.5GB | 251MB | 25 | 80%† |
129
+ | Docling | 60+ min/file‡ | ~5GB | 1,032MB | 88 | Low‡ |
130
130
 
131
+ \*_Can achieve 75% reliability with 15% performance trade-off when configured_
132
+ †_Good on simple documents, struggles with large/complex files (>10MB)_
133
+ ‡_Frequently fails/times out on medium files (>1MB)_
134
+
135
+ > **Benchmark details**: Tested across PDFs, Word docs, HTML, images, spreadsheets in 6 languages (English, Hebrew, German, Chinese, Japanese, Korean)
131
136
  > **Rule of thumb**: Use async API for complex documents and batch processing (up to 4.5x faster)
132
137
 
133
138
  ## Documentation
@@ -161,7 +166,7 @@ ______________________________________________________________________
161
166
 
162
167
  <div align="center">
163
168
 
164
- **[Documentation](https://goldziher.github.io/kreuzberg/) • [PyPI](https://pypi.org/project/kreuzberg/) • [Docker Hub](https://hub.docker.com/r/goldziher/kreuzberg) • [Discord](https://discord.gg/pXxagNK2zN)**
169
+ **[Documentation](https://goldziher.github.io/kreuzberg/) • [PyPI](https://pypi.org/project/kreuzberg/) • [Docker Hub](https://hub.docker.com/r/goldziher/kreuzberg) • [Benchmarks](https://github.com/Goldziher/python-text-extraction-libs-benchmarks) • [Discord](https://discord.gg/pXxagNK2zN)**
165
170
 
166
171
  Made with ❤️ by the [Kreuzberg contributors](https://github.com/Goldziher/kreuzberg/graphs/contributors)
167
172
 
@@ -5,7 +5,7 @@ requires = [ "hatchling" ]
5
5
 
6
6
  [project]
7
7
  name = "kreuzberg"
8
- version = "3.6.0"
8
+ version = "3.6.2"
9
9
  description = "A text extraction library supporting PDFs, images, office documents and more"
10
10
  readme = "README.md"
11
11
  keywords = [
@@ -49,7 +49,7 @@ dependencies = [
49
49
  "anyio>=4.9.0",
50
50
  "charset-normalizer>=3.4.2",
51
51
  "exceptiongroup>=1.2.2; python_version<'3.11'",
52
- "html-to-markdown>=1.4.0",
52
+ "html-to-markdown[lxml]>=1.6.0",
53
53
  "msgspec>=0.18.0",
54
54
  "playa-pdf>=0.6.1", # pinned due to breaking changes in 0.5.0
55
55
  "psutil>=7.0.0",
@@ -100,7 +100,7 @@ async def test_extract_bytes_html(html_document: Path) -> None:
100
100
  assert_extraction_result(result, mime_type=MARKDOWN_MIME_TYPE)
101
101
  assert (
102
102
  result.content
103
- == "Browsers usually insert quotation marks around the q element. WWF's goal is to: Build a future where people live in harmony with nature."
103
+ == 'Browsers usually insert quotation marks around the q element. WWF\'s goal is to: "Build a future where people live in harmony with nature."'
104
104
  )
105
105
 
106
106
 
@@ -169,7 +169,7 @@ async def test_extract_file_html(html_document: Path) -> None:
169
169
  assert_extraction_result(result, mime_type=MARKDOWN_MIME_TYPE)
170
170
  assert (
171
171
  result.content
172
- == "Browsers usually insert quotation marks around the q element. WWF's goal is to: Build a future where people live in harmony with nature."
172
+ == 'Browsers usually insert quotation marks around the q element. WWF\'s goal is to: "Build a future where people live in harmony with nature."'
173
173
  )
174
174
 
175
175
 
@@ -24,7 +24,7 @@ wheels = [
24
24
 
25
25
  [[package]]
26
26
  name = "aiohttp"
27
- version = "3.12.13"
27
+ version = "3.12.14"
28
28
  source = { registry = "https://pypi.org/simple" }
29
29
  dependencies = [
30
30
  { name = "aiohappyeyeballs" },
@@ -35,25 +35,25 @@ dependencies = [
35
35
  { name = "propcache" },
36
36
  { name = "yarl" },
37
37
  ]
38
- sdist = { url = "https://files.pythonhosted.org/packages/42/6e/ab88e7cb2a4058bed2f7870276454f85a7c56cd6da79349eb314fc7bbcaa/aiohttp-3.12.13.tar.gz", hash = "sha256:47e2da578528264a12e4e3dd8dd72a7289e5f812758fe086473fab037a10fcce", size = 7819160, upload-time = "2025-06-14T15:15:41.354Z" }
38
+ sdist = { url = "https://files.pythonhosted.org/packages/e6/0b/e39ad954107ebf213a2325038a3e7a506be3d98e1435e1f82086eec4cde2/aiohttp-3.12.14.tar.gz", hash = "sha256:6e06e120e34d93100de448fd941522e11dafa78ef1a893c179901b7d66aa29f2", size = 7822921, upload-time = "2025-07-10T13:05:33.968Z" }
39
39
  wheels = [
40
- { url = "https://files.pythonhosted.org/packages/11/0f/db19abdf2d86aa1deec3c1e0e5ea46a587b97c07a16516b6438428b3a3f8/aiohttp-3.12.13-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d4a18e61f271127465bdb0e8ff36e8f02ac4a32a80d8927aa52371e93cd87938", size = 694910, upload-time = "2025-06-14T15:14:30.604Z" },
41
- { url = "https://files.pythonhosted.org/packages/d5/81/0ab551e1b5d7f1339e2d6eb482456ccbe9025605b28eed2b1c0203aaaade/aiohttp-3.12.13-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:532542cb48691179455fab429cdb0d558b5e5290b033b87478f2aa6af5d20ace", size = 472566, upload-time = "2025-06-14T15:14:32.275Z" },
42
- { url = "https://files.pythonhosted.org/packages/34/3f/6b7d336663337672d29b1f82d1f252ec1a040fe2d548f709d3f90fa2218a/aiohttp-3.12.13-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d7eea18b52f23c050ae9db5d01f3d264ab08f09e7356d6f68e3f3ac2de9dfabb", size = 464856, upload-time = "2025-06-14T15:14:34.132Z" },
43
- { url = "https://files.pythonhosted.org/packages/26/7f/32ca0f170496aa2ab9b812630fac0c2372c531b797e1deb3deb4cea904bd/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad7c8e5c25f2a26842a7c239de3f7b6bfb92304593ef997c04ac49fb703ff4d7", size = 1703683, upload-time = "2025-06-14T15:14:36.034Z" },
44
- { url = "https://files.pythonhosted.org/packages/ec/53/d5513624b33a811c0abea8461e30a732294112318276ce3dbf047dbd9d8b/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6af355b483e3fe9d7336d84539fef460120c2f6e50e06c658fe2907c69262d6b", size = 1684946, upload-time = "2025-06-14T15:14:38Z" },
45
- { url = "https://files.pythonhosted.org/packages/37/72/4c237dd127827b0247dc138d3ebd49c2ded6114c6991bbe969058575f25f/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a95cf9f097498f35c88e3609f55bb47b28a5ef67f6888f4390b3d73e2bac6177", size = 1737017, upload-time = "2025-06-14T15:14:39.951Z" },
46
- { url = "https://files.pythonhosted.org/packages/0d/67/8a7eb3afa01e9d0acc26e1ef847c1a9111f8b42b82955fcd9faeb84edeb4/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8ed8c38a1c584fe99a475a8f60eefc0b682ea413a84c6ce769bb19a7ff1c5ef", size = 1786390, upload-time = "2025-06-14T15:14:42.151Z" },
47
- { url = "https://files.pythonhosted.org/packages/48/19/0377df97dd0176ad23cd8cad4fd4232cfeadcec6c1b7f036315305c98e3f/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0b9170d5d800126b5bc89d3053a2363406d6e327afb6afaeda2d19ee8bb103", size = 1708719, upload-time = "2025-06-14T15:14:44.039Z" },
48
- { url = "https://files.pythonhosted.org/packages/61/97/ade1982a5c642b45f3622255173e40c3eed289c169f89d00eeac29a89906/aiohttp-3.12.13-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:372feeace612ef8eb41f05ae014a92121a512bd5067db8f25101dd88a8db11da", size = 1622424, upload-time = "2025-06-14T15:14:45.945Z" },
49
- { url = "https://files.pythonhosted.org/packages/99/ab/00ad3eea004e1d07ccc406e44cfe2b8da5acb72f8c66aeeb11a096798868/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a946d3702f7965d81f7af7ea8fb03bb33fe53d311df48a46eeca17e9e0beed2d", size = 1675447, upload-time = "2025-06-14T15:14:47.911Z" },
50
- { url = "https://files.pythonhosted.org/packages/3f/fe/74e5ce8b2ccaba445fe0087abc201bfd7259431d92ae608f684fcac5d143/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a0c4725fae86555bbb1d4082129e21de7264f4ab14baf735278c974785cd2041", size = 1707110, upload-time = "2025-06-14T15:14:50.334Z" },
51
- { url = "https://files.pythonhosted.org/packages/ef/c4/39af17807f694f7a267bd8ab1fbacf16ad66740862192a6c8abac2bff813/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9b28ea2f708234f0a5c44eb6c7d9eb63a148ce3252ba0140d050b091b6e842d1", size = 1649706, upload-time = "2025-06-14T15:14:52.378Z" },
52
- { url = "https://files.pythonhosted.org/packages/38/e8/f5a0a5f44f19f171d8477059aa5f28a158d7d57fe1a46c553e231f698435/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d4f5becd2a5791829f79608c6f3dc745388162376f310eb9c142c985f9441cc1", size = 1725839, upload-time = "2025-06-14T15:14:54.617Z" },
53
- { url = "https://files.pythonhosted.org/packages/fd/ac/81acc594c7f529ef4419d3866913f628cd4fa9cab17f7bf410a5c3c04c53/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:60f2ce6b944e97649051d5f5cc0f439360690b73909230e107fd45a359d3e911", size = 1759311, upload-time = "2025-06-14T15:14:56.597Z" },
54
- { url = "https://files.pythonhosted.org/packages/38/0d/aabe636bd25c6ab7b18825e5a97d40024da75152bec39aa6ac8b7a677630/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:69fc1909857401b67bf599c793f2183fbc4804717388b0b888f27f9929aa41f3", size = 1708202, upload-time = "2025-06-14T15:14:58.598Z" },
55
- { url = "https://files.pythonhosted.org/packages/1f/ab/561ef2d8a223261683fb95a6283ad0d36cb66c87503f3a7dde7afe208bb2/aiohttp-3.12.13-cp313-cp313-win32.whl", hash = "sha256:7d7e68787a2046b0e44ba5587aa723ce05d711e3a3665b6b7545328ac8e3c0dd", size = 420794, upload-time = "2025-06-14T15:15:00.939Z" },
56
- { url = "https://files.pythonhosted.org/packages/9d/47/b11d0089875a23bff0abd3edb5516bcd454db3fefab8604f5e4b07bd6210/aiohttp-3.12.13-cp313-cp313-win_amd64.whl", hash = "sha256:5a178390ca90419bfd41419a809688c368e63c86bd725e1186dd97f6b89c2706", size = 446735, upload-time = "2025-06-14T15:15:02.858Z" },
40
+ { url = "https://files.pythonhosted.org/packages/06/48/e0d2fa8ac778008071e7b79b93ab31ef14ab88804d7ba71b5c964a7c844e/aiohttp-3.12.14-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3143a7893d94dc82bc409f7308bc10d60285a3cd831a68faf1aa0836c5c3c767", size = 695471, upload-time = "2025-07-10T13:04:20.124Z" },
41
+ { url = "https://files.pythonhosted.org/packages/8d/e7/f73206afa33100804f790b71092888f47df65fd9a4cd0e6800d7c6826441/aiohttp-3.12.14-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3d62ac3d506cef54b355bd34c2a7c230eb693880001dfcda0bf88b38f5d7af7e", size = 473128, upload-time = "2025-07-10T13:04:21.928Z" },
42
+ { url = "https://files.pythonhosted.org/packages/df/e2/4dd00180be551a6e7ee979c20fc7c32727f4889ee3fd5b0586e0d47f30e1/aiohttp-3.12.14-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:48e43e075c6a438937c4de48ec30fa8ad8e6dfef122a038847456bfe7b947b63", size = 465426, upload-time = "2025-07-10T13:04:24.071Z" },
43
+ { url = "https://files.pythonhosted.org/packages/de/dd/525ed198a0bb674a323e93e4d928443a680860802c44fa7922d39436b48b/aiohttp-3.12.14-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:077b4488411a9724cecc436cbc8c133e0d61e694995b8de51aaf351c7578949d", size = 1704252, upload-time = "2025-07-10T13:04:26.049Z" },
44
+ { url = "https://files.pythonhosted.org/packages/d8/b1/01e542aed560a968f692ab4fc4323286e8bc4daae83348cd63588e4f33e3/aiohttp-3.12.14-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d8c35632575653f297dcbc9546305b2c1133391089ab925a6a3706dfa775ccab", size = 1685514, upload-time = "2025-07-10T13:04:28.186Z" },
45
+ { url = "https://files.pythonhosted.org/packages/b3/06/93669694dc5fdabdc01338791e70452d60ce21ea0946a878715688d5a191/aiohttp-3.12.14-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b8ce87963f0035c6834b28f061df90cf525ff7c9b6283a8ac23acee6502afd4", size = 1737586, upload-time = "2025-07-10T13:04:30.195Z" },
46
+ { url = "https://files.pythonhosted.org/packages/a5/3a/18991048ffc1407ca51efb49ba8bcc1645961f97f563a6c480cdf0286310/aiohttp-3.12.14-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0a2cf66e32a2563bb0766eb24eae7e9a269ac0dc48db0aae90b575dc9583026", size = 1786958, upload-time = "2025-07-10T13:04:32.482Z" },
47
+ { url = "https://files.pythonhosted.org/packages/30/a8/81e237f89a32029f9b4a805af6dffc378f8459c7b9942712c809ff9e76e5/aiohttp-3.12.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdea089caf6d5cde975084a884c72d901e36ef9c2fd972c9f51efbbc64e96fbd", size = 1709287, upload-time = "2025-07-10T13:04:34.493Z" },
48
+ { url = "https://files.pythonhosted.org/packages/8c/e3/bd67a11b0fe7fc12c6030473afd9e44223d456f500f7cf526dbaa259ae46/aiohttp-3.12.14-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a7865f27db67d49e81d463da64a59365ebd6b826e0e4847aa111056dcb9dc88", size = 1622990, upload-time = "2025-07-10T13:04:36.433Z" },
49
+ { url = "https://files.pythonhosted.org/packages/83/ba/e0cc8e0f0d9ce0904e3cf2d6fa41904e379e718a013c721b781d53dcbcca/aiohttp-3.12.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0ab5b38a6a39781d77713ad930cb5e7feea6f253de656a5f9f281a8f5931b086", size = 1676015, upload-time = "2025-07-10T13:04:38.958Z" },
50
+ { url = "https://files.pythonhosted.org/packages/d8/b3/1e6c960520bda094c48b56de29a3d978254637ace7168dd97ddc273d0d6c/aiohttp-3.12.14-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:9b3b15acee5c17e8848d90a4ebc27853f37077ba6aec4d8cb4dbbea56d156933", size = 1707678, upload-time = "2025-07-10T13:04:41.275Z" },
51
+ { url = "https://files.pythonhosted.org/packages/0a/19/929a3eb8c35b7f9f076a462eaa9830b32c7f27d3395397665caa5e975614/aiohttp-3.12.14-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e4c972b0bdaac167c1e53e16a16101b17c6d0ed7eac178e653a07b9f7fad7151", size = 1650274, upload-time = "2025-07-10T13:04:43.483Z" },
52
+ { url = "https://files.pythonhosted.org/packages/22/e5/81682a6f20dd1b18ce3d747de8eba11cbef9b270f567426ff7880b096b48/aiohttp-3.12.14-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7442488b0039257a3bdbc55f7209587911f143fca11df9869578db6c26feeeb8", size = 1726408, upload-time = "2025-07-10T13:04:45.577Z" },
53
+ { url = "https://files.pythonhosted.org/packages/8c/17/884938dffaa4048302985483f77dfce5ac18339aad9b04ad4aaa5e32b028/aiohttp-3.12.14-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f68d3067eecb64c5e9bab4a26aa11bd676f4c70eea9ef6536b0a4e490639add3", size = 1759879, upload-time = "2025-07-10T13:04:47.663Z" },
54
+ { url = "https://files.pythonhosted.org/packages/95/78/53b081980f50b5cf874359bde707a6eacd6c4be3f5f5c93937e48c9d0025/aiohttp-3.12.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f88d3704c8b3d598a08ad17d06006cb1ca52a1182291f04979e305c8be6c9758", size = 1708770, upload-time = "2025-07-10T13:04:49.944Z" },
55
+ { url = "https://files.pythonhosted.org/packages/ed/91/228eeddb008ecbe3ffa6c77b440597fdf640307162f0c6488e72c5a2d112/aiohttp-3.12.14-cp313-cp313-win32.whl", hash = "sha256:a3c99ab19c7bf375c4ae3debd91ca5d394b98b6089a03231d4c580ef3c2ae4c5", size = 421688, upload-time = "2025-07-10T13:04:51.993Z" },
56
+ { url = "https://files.pythonhosted.org/packages/66/5f/8427618903343402fdafe2850738f735fd1d9409d2a8f9bcaae5e630d3ba/aiohttp-3.12.14-cp313-cp313-win_amd64.whl", hash = "sha256:3f8aad695e12edc9d571f878c62bedc91adf30c760c8632f09663e5f564f4baa", size = 448098, upload-time = "2025-07-10T13:04:53.999Z" },
57
57
  ]
58
58
 
59
59
  [[package]]
@@ -92,11 +92,11 @@ wheels = [
92
92
 
93
93
  [[package]]
94
94
  name = "asgiref"
95
- version = "3.9.0"
95
+ version = "3.9.1"
96
96
  source = { registry = "https://pypi.org/simple" }
97
- sdist = { url = "https://files.pythonhosted.org/packages/6a/68/fb4fb78c9eac59d5e819108a57664737f855c5a8e9b76aec1738bb137f9e/asgiref-3.9.0.tar.gz", hash = "sha256:3dd2556d0f08c4fab8a010d9ab05ef8c34565f6bf32381d17505f7ca5b273767", size = 36772, upload-time = "2025-07-03T13:25:01.491Z" }
97
+ sdist = { url = "https://files.pythonhosted.org/packages/90/61/0aa957eec22ff70b830b22ff91f825e70e1ef732c06666a805730f28b36b/asgiref-3.9.1.tar.gz", hash = "sha256:a5ab6582236218e5ef1648f242fd9f10626cfd4de8dc377db215d5d5098e3142", size = 36870, upload-time = "2025-07-08T09:07:43.344Z" }
98
98
  wheels = [
99
- { url = "https://files.pythonhosted.org/packages/3d/f9/76c9f4d4985b5a642926162e2d41fe6019b1fa929cfa58abb7d2dc9041e5/asgiref-3.9.0-py3-none-any.whl", hash = "sha256:06a41250a0114d2b6f6a2cb3ab962147d355b53d1de15eebc34a9d04a7b79981", size = 23788, upload-time = "2025-07-03T13:24:59.115Z" },
99
+ { url = "https://files.pythonhosted.org/packages/7c/3c/0464dcada90d5da0e71018c04a140ad6349558afb30b3051b4264cc5b965/asgiref-3.9.1-py3-none-any.whl", hash = "sha256:f3bba7092a48005b5f5bacd747d36ee4a5a61f4a269a6df590b43144355ebd2c", size = 23790, upload-time = "2025-07-08T09:07:41.548Z" },
100
100
  ]
101
101
 
102
102
  [[package]]
@@ -212,11 +212,11 @@ wheels = [
212
212
 
213
213
  [[package]]
214
214
  name = "certifi"
215
- version = "2025.6.15"
215
+ version = "2025.7.9"
216
216
  source = { registry = "https://pypi.org/simple" }
217
- sdist = { url = "https://files.pythonhosted.org/packages/73/f7/f14b46d4bcd21092d7d3ccef689615220d8a08fb25e564b65d20738e672e/certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b", size = 158753, upload-time = "2025-06-15T02:45:51.329Z" }
217
+ sdist = { url = "https://files.pythonhosted.org/packages/de/8a/c729b6b60c66a38f590c4e774decc4b2ec7b0576be8f1aa984a53ffa812a/certifi-2025.7.9.tar.gz", hash = "sha256:c1d2ec05395148ee10cf672ffc28cd37ea0ab0d99f9cc74c43e588cbd111b079", size = 160386, upload-time = "2025-07-09T02:13:58.874Z" }
218
218
  wheels = [
219
- { url = "https://files.pythonhosted.org/packages/84/ae/320161bd181fc06471eed047ecce67b693fd7515b16d495d8932db763426/certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", size = 157650, upload-time = "2025-06-15T02:45:49.977Z" },
219
+ { url = "https://files.pythonhosted.org/packages/66/f3/80a3f974c8b535d394ff960a11ac20368e06b736da395b551a49ce950cce/certifi-2025.7.9-py3-none-any.whl", hash = "sha256:d842783a14f8fdd646895ac26f719a061408834473cfc10203f6a575beb15d39", size = 159230, upload-time = "2025-07-09T02:13:57.007Z" },
220
220
  ]
221
221
 
222
222
  [[package]]
@@ -866,14 +866,19 @@ wheels = [
866
866
 
867
867
  [[package]]
868
868
  name = "html-to-markdown"
869
- version = "1.4.0"
869
+ version = "1.6.0"
870
870
  source = { registry = "https://pypi.org/simple" }
871
871
  dependencies = [
872
872
  { name = "beautifulsoup4" },
873
873
  ]
874
- sdist = { url = "https://files.pythonhosted.org/packages/67/62/c48b6e6cff38b9983544bef286f15f0ad249e8a5a09c6705a372bc0badd7/html_to_markdown-1.4.0.tar.gz", hash = "sha256:aced18e152f92c5a875e1ad2902b0c27527fda0b50376671941e3e399cf4d236", size = 16233, upload-time = "2025-06-23T15:52:06.059Z" }
874
+ sdist = { url = "https://files.pythonhosted.org/packages/6b/33/041a35156641991d6805af0847e82467c69a6071738e982379ff9a55151b/html_to_markdown-1.6.0.tar.gz", hash = "sha256:b2916f9d78f4faac866935688ff3f05f7e0a873554f9d797b8bca0e32afdc921", size = 35099, upload-time = "2025-07-11T06:14:12.227Z" }
875
875
  wheels = [
876
- { url = "https://files.pythonhosted.org/packages/bd/ac/38f0f75c8266f2598914f2118371d05faf6b37a5b2bd27c49135630254ba/html_to_markdown-1.4.0-py3-none-any.whl", hash = "sha256:0e3117740d3c92887995c5a251b52a9dbe9f16caaaca56bdf100fe01b227c70b", size = 15451, upload-time = "2025-06-23T15:52:04.6Z" },
876
+ { url = "https://files.pythonhosted.org/packages/c9/23/38e07928c22dd1facd00781758bc4efb5f94c0240aa59e22b9a290fe44a1/html_to_markdown-1.6.0-py3-none-any.whl", hash = "sha256:88848e851ea80719397fc5356018c367d415bbd1ace030f3ad380fafd49fa0ed", size = 31616, upload-time = "2025-07-11T06:14:10.544Z" },
877
+ ]
878
+
879
+ [package.optional-dependencies]
880
+ lxml = [
881
+ { name = "lxml" },
877
882
  ]
878
883
 
879
884
  [[package]]
@@ -1166,12 +1171,12 @@ wheels = [
1166
1171
 
1167
1172
  [[package]]
1168
1173
  name = "kreuzberg"
1169
- version = "3.6.0"
1174
+ version = "3.6.2"
1170
1175
  source = { editable = "." }
1171
1176
  dependencies = [
1172
1177
  { name = "anyio" },
1173
1178
  { name = "charset-normalizer" },
1174
- { name = "html-to-markdown" },
1179
+ { name = "html-to-markdown", extra = ["lxml"] },
1175
1180
  { name = "msgspec" },
1176
1181
  { name = "playa-pdf" },
1177
1182
  { name = "psutil" },
@@ -1254,7 +1259,7 @@ requires-dist = [
1254
1259
  { name = "exceptiongroup", marker = "python_full_version < '3.11'", specifier = ">=1.2.2" },
1255
1260
  { name = "fast-langdetect", marker = "extra == 'langdetect'", specifier = ">=0.3.2" },
1256
1261
  { name = "gmft", marker = "extra == 'gmft'", specifier = ">=0.4.2" },
1257
- { name = "html-to-markdown", specifier = ">=1.4.0" },
1262
+ { name = "html-to-markdown", extras = ["lxml"], specifier = ">=1.6.0" },
1258
1263
  { name = "keybert", marker = "extra == 'entity-extraction'", specifier = ">=0.9.0" },
1259
1264
  { name = "kreuzberg", extras = ["api", "chunking", "cli", "easyocr", "entity-extraction", "gmft", "langdetect", "paddleocr"], marker = "extra == 'all'" },
1260
1265
  { name = "litestar", extras = ["standard", "structlog", "opentelemetry"], marker = "extra == 'api'", specifier = ">=2.16.0" },
@@ -1422,7 +1427,7 @@ wheels = [
1422
1427
 
1423
1428
  [[package]]
1424
1429
  name = "langsmith"
1425
- version = "0.4.4"
1430
+ version = "0.4.5"
1426
1431
  source = { registry = "https://pypi.org/simple" }
1427
1432
  dependencies = [
1428
1433
  { name = "httpx" },
@@ -1433,9 +1438,9 @@ dependencies = [
1433
1438
  { name = "requests-toolbelt" },
1434
1439
  { name = "zstandard" },
1435
1440
  ]
1436
- sdist = { url = "https://files.pythonhosted.org/packages/20/c8/8d2e0fc438d2d3d8d4300f7684ea30a754344ed00d7ba9cc2705241d2a5f/langsmith-0.4.4.tar.gz", hash = "sha256:70c53bbff24a7872e88e6fa0af98270f4986a6e364f9e85db1cc5636defa4d66", size = 352105, upload-time = "2025-06-27T19:20:36.207Z" }
1441
+ sdist = { url = "https://files.pythonhosted.org/packages/5c/92/7885823f3d13222f57773921f0da19b37d628c64607491233dc853a0f6ea/langsmith-0.4.5.tar.gz", hash = "sha256:49444bd8ccd4e46402f1b9ff1d686fa8e3a31b175e7085e72175ab8ec6164a34", size = 352235, upload-time = "2025-07-10T22:08:04.505Z" }
1437
1442
  wheels = [
1438
- { url = "https://files.pythonhosted.org/packages/1d/33/a3337eb70d795495a299a1640d7a75f17fb917155a64309b96106e7b9452/langsmith-0.4.4-py3-none-any.whl", hash = "sha256:014c68329bd085bd6c770a6405c61bb6881f82eb554ce8c4d1984b0035fd1716", size = 367687, upload-time = "2025-06-27T19:20:33.839Z" },
1443
+ { url = "https://files.pythonhosted.org/packages/c8/10/ad3107b666c3203b7938d10ea6b8746b9735c399cf737a51386d58e41d34/langsmith-0.4.5-py3-none-any.whl", hash = "sha256:4167717a2cccc4dff5809dbddc439628e836f6fd13d4fdb31ea013bc8d5cfaf5", size = 367795, upload-time = "2025-07-10T22:08:02.548Z" },
1439
1444
  ]
1440
1445
 
1441
1446
  [[package]]
@@ -2170,7 +2175,7 @@ wheels = [
2170
2175
 
2171
2176
  [[package]]
2172
2177
  name = "openai"
2173
- version = "1.93.0"
2178
+ version = "1.95.0"
2174
2179
  source = { registry = "https://pypi.org/simple" }
2175
2180
  dependencies = [
2176
2181
  { name = "anyio" },
@@ -2182,9 +2187,9 @@ dependencies = [
2182
2187
  { name = "tqdm" },
2183
2188
  { name = "typing-extensions" },
2184
2189
  ]
2185
- sdist = { url = "https://files.pythonhosted.org/packages/e4/d7/e91c6a9cf71726420cddf539852ee4c29176ebb716a702d9118d0409fd8e/openai-1.93.0.tar.gz", hash = "sha256:988f31ade95e1ff0585af11cc5a64510225e4f5cd392698c675d0a9265b8e337", size = 486573, upload-time = "2025-06-27T21:21:39.421Z" }
2190
+ sdist = { url = "https://files.pythonhosted.org/packages/ef/2f/0c6f509a1585545962bfa6e201d7fb658eb2a6f52fb8c26765632d91706c/openai-1.95.0.tar.gz", hash = "sha256:54bc42df9f7142312647dd485d34cca5df20af825fa64a30ca55164be2cf4cc9", size = 488144, upload-time = "2025-07-10T18:35:49.946Z" }
2186
2191
  wheels = [
2187
- { url = "https://files.pythonhosted.org/packages/64/46/a10d9df4673df56f71201d129ba1cb19eaff3366d08c8664d61a7df52e65/openai-1.93.0-py3-none-any.whl", hash = "sha256:3d746fe5498f0dd72e0d9ab706f26c91c0f646bf7459e5629af8ba7c9dbdf090", size = 755038, upload-time = "2025-06-27T21:21:37.532Z" },
2192
+ { url = "https://files.pythonhosted.org/packages/19/a5/57d0bb58b938a3e3f352ff26e645da1660436402a6ad1b29780d261cc5a5/openai-1.95.0-py3-none-any.whl", hash = "sha256:a7afc9dca7e7d616371842af8ea6dbfbcb739a85d183f5f664ab1cc311b9ef18", size = 755572, upload-time = "2025-07-10T18:35:47.507Z" },
2188
2193
  ]
2189
2194
 
2190
2195
  [[package]]
@@ -2392,7 +2397,7 @@ wheels = [
2392
2397
 
2393
2398
  [[package]]
2394
2399
  name = "paddlex"
2395
- version = "3.1.1"
2400
+ version = "3.1.2"
2396
2401
  source = { registry = "https://pypi.org/simple" }
2397
2402
  dependencies = [
2398
2403
  { name = "chardet" },
@@ -2413,7 +2418,7 @@ dependencies = [
2413
2418
  { name = "ujson" },
2414
2419
  ]
2415
2420
  wheels = [
2416
- { url = "https://files.pythonhosted.org/packages/53/3f/d474c8e0a221efae136e052f722d9e1e71e09001c34412a825b216753a0b/paddlex-3.1.1-py3-none-any.whl", hash = "sha256:0e3c79274ba57b8a07757f9f40f2b625fe45eb1efce167de4dce405ff1833ea4", size = 1687658, upload-time = "2025-06-29T12:20:34.947Z" },
2421
+ { url = "https://files.pythonhosted.org/packages/5a/71/f824642aa730a45c6cd195c2df3d0d6376f170023f66aadc1b9f1f856ac7/paddlex-3.1.2-py3-none-any.whl", hash = "sha256:a87ede09cf6a9aebc09deadfaaedc6377505b408cac2ca4ec372c90e7399c71b", size = 1688520, upload-time = "2025-07-08T08:47:20.639Z" },
2417
2422
  ]
2418
2423
 
2419
2424
  [package.optional-dependencies]
@@ -2489,7 +2494,7 @@ wheels = [
2489
2494
 
2490
2495
  [[package]]
2491
2496
  name = "pandas"
2492
- version = "2.3.0"
2497
+ version = "2.3.1"
2493
2498
  source = { registry = "https://pypi.org/simple" }
2494
2499
  dependencies = [
2495
2500
  { name = "numpy" },
@@ -2497,21 +2502,21 @@ dependencies = [
2497
2502
  { name = "pytz" },
2498
2503
  { name = "tzdata" },
2499
2504
  ]
2500
- sdist = { url = "https://files.pythonhosted.org/packages/72/51/48f713c4c728d7c55ef7444ba5ea027c26998d96d1a40953b346438602fc/pandas-2.3.0.tar.gz", hash = "sha256:34600ab34ebf1131a7613a260a61dbe8b62c188ec0ea4c296da7c9a06b004133", size = 4484490, upload-time = "2025-06-05T03:27:54.133Z" }
2501
- wheels = [
2502
- { url = "https://files.pythonhosted.org/packages/d3/57/5cb75a56a4842bbd0511c3d1c79186d8315b82dac802118322b2de1194fe/pandas-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c7e2fc25f89a49a11599ec1e76821322439d90820108309bf42130d2f36c983", size = 11518913, upload-time = "2025-06-05T03:27:02.757Z" },
2503
- { url = "https://files.pythonhosted.org/packages/05/01/0c8785610e465e4948a01a059562176e4c8088aa257e2e074db868f86d4e/pandas-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c6da97aeb6a6d233fb6b17986234cc723b396b50a3c6804776351994f2a658fd", size = 10655249, upload-time = "2025-06-05T16:50:20.17Z" },
2504
- { url = "https://files.pythonhosted.org/packages/e8/6a/47fd7517cd8abe72a58706aab2b99e9438360d36dcdb052cf917b7bf3bdc/pandas-2.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb32dc743b52467d488e7a7c8039b821da2826a9ba4f85b89ea95274f863280f", size = 11328359, upload-time = "2025-06-05T03:27:06.431Z" },
2505
- { url = "https://files.pythonhosted.org/packages/2a/b3/463bfe819ed60fb7e7ddffb4ae2ee04b887b3444feee6c19437b8f834837/pandas-2.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:213cd63c43263dbb522c1f8a7c9d072e25900f6975596f883f4bebd77295d4f3", size = 12024789, upload-time = "2025-06-05T03:27:09.875Z" },
2506
- { url = "https://files.pythonhosted.org/packages/04/0c/e0704ccdb0ac40aeb3434d1c641c43d05f75c92e67525df39575ace35468/pandas-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1d2b33e68d0ce64e26a4acc2e72d747292084f4e8db4c847c6f5f6cbe56ed6d8", size = 12480734, upload-time = "2025-06-06T00:00:22.246Z" },
2507
- { url = "https://files.pythonhosted.org/packages/e9/df/815d6583967001153bb27f5cf075653d69d51ad887ebbf4cfe1173a1ac58/pandas-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:430a63bae10b5086995db1b02694996336e5a8ac9a96b4200572b413dfdfccb9", size = 13223381, upload-time = "2025-06-05T03:27:15.641Z" },
2508
- { url = "https://files.pythonhosted.org/packages/79/88/ca5973ed07b7f484c493e941dbff990861ca55291ff7ac67c815ce347395/pandas-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4930255e28ff5545e2ca404637bcc56f031893142773b3468dc021c6c32a1390", size = 10970135, upload-time = "2025-06-05T03:27:24.131Z" },
2509
- { url = "https://files.pythonhosted.org/packages/24/fb/0994c14d1f7909ce83f0b1fb27958135513c4f3f2528bde216180aa73bfc/pandas-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f925f1ef673b4bd0271b1809b72b3270384f2b7d9d14a189b12b7fc02574d575", size = 12141356, upload-time = "2025-06-05T03:27:34.547Z" },
2510
- { url = "https://files.pythonhosted.org/packages/9d/a2/9b903e5962134497ac4f8a96f862ee3081cb2506f69f8e4778ce3d9c9d82/pandas-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78ad363ddb873a631e92a3c063ade1ecfb34cae71e9a2be6ad100f875ac1042", size = 11474674, upload-time = "2025-06-05T03:27:39.448Z" },
2511
- { url = "https://files.pythonhosted.org/packages/81/3a/3806d041bce032f8de44380f866059437fb79e36d6b22c82c187e65f765b/pandas-2.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951805d146922aed8357e4cc5671b8b0b9be1027f0619cea132a9f3f65f2f09c", size = 11439876, upload-time = "2025-06-05T03:27:43.652Z" },
2512
- { url = "https://files.pythonhosted.org/packages/15/aa/3fc3181d12b95da71f5c2537c3e3b3af6ab3a8c392ab41ebb766e0929bc6/pandas-2.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a881bc1309f3fce34696d07b00f13335c41f5f5a8770a33b09ebe23261cfc67", size = 11966182, upload-time = "2025-06-05T03:27:47.652Z" },
2513
- { url = "https://files.pythonhosted.org/packages/37/e7/e12f2d9b0a2c4a2cc86e2aabff7ccfd24f03e597d770abfa2acd313ee46b/pandas-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e1991bbb96f4050b09b5f811253c4f3cf05ee89a589379aa36cd623f21a31d6f", size = 12547686, upload-time = "2025-06-06T00:00:26.142Z" },
2514
- { url = "https://files.pythonhosted.org/packages/39/c2/646d2e93e0af70f4e5359d870a63584dacbc324b54d73e6b3267920ff117/pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249", size = 13231847, upload-time = "2025-06-05T03:27:51.465Z" },
2505
+ sdist = { url = "https://files.pythonhosted.org/packages/d1/6f/75aa71f8a14267117adeeed5d21b204770189c0a0025acbdc03c337b28fc/pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2", size = 4487493, upload-time = "2025-07-07T19:20:04.079Z" }
2506
+ wheels = [
2507
+ { url = "https://files.pythonhosted.org/packages/32/ed/ff0a67a2c5505e1854e6715586ac6693dd860fbf52ef9f81edee200266e7/pandas-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9026bd4a80108fac2239294a15ef9003c4ee191a0f64b90f170b40cfb7cf2d22", size = 11531393, upload-time = "2025-07-07T19:19:12.245Z" },
2508
+ { url = "https://files.pythonhosted.org/packages/c7/db/d8f24a7cc9fb0972adab0cc80b6817e8bef888cfd0024eeb5a21c0bb5c4a/pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6de8547d4fdb12421e2d047a2c446c623ff4c11f47fddb6b9169eb98ffba485a", size = 10668750, upload-time = "2025-07-07T19:19:14.612Z" },
2509
+ { url = "https://files.pythonhosted.org/packages/0f/b0/80f6ec783313f1e2356b28b4fd8d2148c378370045da918c73145e6aab50/pandas-2.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:782647ddc63c83133b2506912cc6b108140a38a37292102aaa19c81c83db2928", size = 11342004, upload-time = "2025-07-07T19:19:16.857Z" },
2510
+ { url = "https://files.pythonhosted.org/packages/e9/e2/20a317688435470872885e7fc8f95109ae9683dec7c50be29b56911515a5/pandas-2.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba6aff74075311fc88504b1db890187a3cd0f887a5b10f5525f8e2ef55bfdb9", size = 12050869, upload-time = "2025-07-07T19:19:19.265Z" },
2511
+ { url = "https://files.pythonhosted.org/packages/55/79/20d746b0a96c67203a5bee5fb4e00ac49c3e8009a39e1f78de264ecc5729/pandas-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e5635178b387bd2ba4ac040f82bc2ef6e6b500483975c4ebacd34bec945fda12", size = 12750218, upload-time = "2025-07-07T19:19:21.547Z" },
2512
+ { url = "https://files.pythonhosted.org/packages/7c/0f/145c8b41e48dbf03dd18fdd7f24f8ba95b8254a97a3379048378f33e7838/pandas-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f3bf5ec947526106399a9e1d26d40ee2b259c66422efdf4de63c848492d91bb", size = 13416763, upload-time = "2025-07-07T19:19:23.939Z" },
2513
+ { url = "https://files.pythonhosted.org/packages/b2/c0/54415af59db5cdd86a3d3bf79863e8cc3fa9ed265f0745254061ac09d5f2/pandas-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:1c78cf43c8fde236342a1cb2c34bcff89564a7bfed7e474ed2fffa6aed03a956", size = 10987482, upload-time = "2025-07-07T19:19:42.699Z" },
2514
+ { url = "https://files.pythonhosted.org/packages/48/64/2fd2e400073a1230e13b8cd604c9bc95d9e3b962e5d44088ead2e8f0cfec/pandas-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8dfc17328e8da77be3cf9f47509e5637ba8f137148ed0e9b5241e1baf526e20a", size = 12029159, upload-time = "2025-07-07T19:19:26.362Z" },
2515
+ { url = "https://files.pythonhosted.org/packages/d8/0a/d84fd79b0293b7ef88c760d7dca69828d867c89b6d9bc52d6a27e4d87316/pandas-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ec6c851509364c59a5344458ab935e6451b31b818be467eb24b0fe89bd05b6b9", size = 11393287, upload-time = "2025-07-07T19:19:29.157Z" },
2516
+ { url = "https://files.pythonhosted.org/packages/50/ae/ff885d2b6e88f3c7520bb74ba319268b42f05d7e583b5dded9837da2723f/pandas-2.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:911580460fc4884d9b05254b38a6bfadddfcc6aaef856fb5859e7ca202e45275", size = 11309381, upload-time = "2025-07-07T19:19:31.436Z" },
2517
+ { url = "https://files.pythonhosted.org/packages/85/86/1fa345fc17caf5d7780d2699985c03dbe186c68fee00b526813939062bb0/pandas-2.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4d6feeba91744872a600e6edbbd5b033005b431d5ae8379abee5bcfa479fab", size = 11883998, upload-time = "2025-07-07T19:19:34.267Z" },
2518
+ { url = "https://files.pythonhosted.org/packages/81/aa/e58541a49b5e6310d89474333e994ee57fea97c8aaa8fc7f00b873059bbf/pandas-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fe37e757f462d31a9cd7580236a82f353f5713a80e059a29753cf938c6775d96", size = 12704705, upload-time = "2025-07-07T19:19:36.856Z" },
2519
+ { url = "https://files.pythonhosted.org/packages/d5/f9/07086f5b0f2a19872554abeea7658200824f5835c58a106fa8f2ae96a46c/pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444", size = 13189044, upload-time = "2025-07-07T19:19:39.999Z" },
2515
2520
  ]
2516
2521
 
2517
2522
  [[package]]
File without changes
File without changes
File without changes