kreuzberg 3.9.0__tar.gz → 3.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.pre-commit-config.yaml +1 -1
  2. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/PKG-INFO +11 -11
  3. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/README.md +4 -4
  4. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/pyproject.toml +14 -8
  5. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/uv.lock +790 -619
  6. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.commitlintrc +0 -0
  7. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.deepsource.toml +0 -0
  8. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.docker/Dockerfile +0 -0
  9. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.docker/README.md +0 -0
  10. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.dockerignore +0 -0
  11. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.github/dependabot.yaml +0 -0
  12. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.github/workflows/ci.yaml +0 -0
  13. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.github/workflows/docs.yml +0 -0
  14. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.github/workflows/pr-title.yaml +0 -0
  15. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.github/workflows/publish-docker.yml +0 -0
  16. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.github/workflows/release.yaml +0 -0
  17. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.gitignore +0 -0
  18. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/.markdownlint.yaml +0 -0
  19. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/LICENSE +0 -0
  20. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/ai-rulez.yaml +0 -0
  21. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/README.md +0 -0
  22. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/benchmark_baseline.py +0 -0
  23. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/end_to_end_benchmark.py +0 -0
  24. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/final_benchmark.py +0 -0
  25. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/pyproject.toml +0 -0
  26. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/baseline_results.json +0 -0
  27. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/benchmark_msgpack_20250702_003800.json +0 -0
  28. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/comprehensive_caching_results.json +0 -0
  29. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/final_benchmark_results.json +0 -0
  30. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/latest.json +0 -0
  31. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/mime_caching_results.json +0 -0
  32. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/msgspec_caching_results.json +0 -0
  33. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/ocr_caching_results.json +0 -0
  34. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/serialization_benchmark_results.json +0 -0
  35. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/statistical_benchmark_results.json +0 -0
  36. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/results/table_caching_results.json +0 -0
  37. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/serialization_benchmark.py +0 -0
  38. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/src/kreuzberg_benchmarks/__init__.py +0 -0
  39. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/src/kreuzberg_benchmarks/__main__.py +0 -0
  40. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/src/kreuzberg_benchmarks/benchmarks.py +0 -0
  41. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/src/kreuzberg_benchmarks/cli.py +0 -0
  42. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/src/kreuzberg_benchmarks/models.py +0 -0
  43. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/src/kreuzberg_benchmarks/profiler.py +0 -0
  44. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/src/kreuzberg_benchmarks/runner.py +0 -0
  45. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/benchmarks/statistical_benchmark.py +0 -0
  46. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/advanced/custom-extractors.md +0 -0
  47. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/advanced/custom-hooks.md +0 -0
  48. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/advanced/error-handling.md +0 -0
  49. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/advanced/index.md +0 -0
  50. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/advanced/performance.md +0 -0
  51. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/api-reference/exceptions.md +0 -0
  52. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/api-reference/extraction-functions.md +0 -0
  53. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/api-reference/extractor-registry.md +0 -0
  54. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/api-reference/index.md +0 -0
  55. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/api-reference/ocr-configuration.md +0 -0
  56. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/api-reference/types.md +0 -0
  57. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/assets/favicon.png +0 -0
  58. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/assets/logo.png +0 -0
  59. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/changelog.md +0 -0
  60. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/cli.md +0 -0
  61. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/contributing.md +0 -0
  62. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/css/extra.css +0 -0
  63. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/examples/extraction-examples.md +0 -0
  64. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/examples/index.md +0 -0
  65. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/getting-started/index.md +0 -0
  66. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/getting-started/installation.md +0 -0
  67. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/getting-started/quick-start.md +0 -0
  68. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/index.md +0 -0
  69. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/performance-analysis.md +0 -0
  70. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/api-server.md +0 -0
  71. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/basic-usage.md +0 -0
  72. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/chunking.md +0 -0
  73. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/docker.md +0 -0
  74. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/document-classification.md +0 -0
  75. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/extraction-configuration.md +0 -0
  76. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/index.md +0 -0
  77. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/mcp-server.md +0 -0
  78. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/metadata-extraction.md +0 -0
  79. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/ocr-backends.md +0 -0
  80. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/ocr-configuration.md +0 -0
  81. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/docs/user-guide/supported-formats.md +0 -0
  82. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/__init__.py +0 -0
  83. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/__main__.py +0 -0
  84. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_api/__init__.py +0 -0
  85. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_api/main.py +0 -0
  86. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_chunker.py +0 -0
  87. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_config.py +0 -0
  88. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_constants.py +0 -0
  89. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_document_classification.py +0 -0
  90. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_entity_extraction.py +0 -0
  91. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/__init__.py +0 -0
  92. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_base.py +0 -0
  93. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_email.py +0 -0
  94. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_html.py +0 -0
  95. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_image.py +0 -0
  96. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_pandoc.py +0 -0
  97. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_pdf.py +0 -0
  98. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_presentation.py +0 -0
  99. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_spread_sheet.py +0 -0
  100. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_extractors/_structured.py +0 -0
  101. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_gmft.py +0 -0
  102. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_language_detection.py +0 -0
  103. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_mcp/__init__.py +0 -0
  104. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_mcp/server.py +0 -0
  105. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_mime_types.py +0 -0
  106. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_ocr/__init__.py +0 -0
  107. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_ocr/_base.py +0 -0
  108. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_ocr/_easyocr.py +0 -0
  109. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_ocr/_paddleocr.py +0 -0
  110. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_ocr/_tesseract.py +0 -0
  111. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_playa.py +0 -0
  112. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_registry.py +0 -0
  113. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_types.py +0 -0
  114. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/__init__.py +0 -0
  115. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_cache.py +0 -0
  116. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_device.py +0 -0
  117. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_document_cache.py +0 -0
  118. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_errors.py +0 -0
  119. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_pdf_lock.py +0 -0
  120. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_process_pool.py +0 -0
  121. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_quality.py +0 -0
  122. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_serialization.py +0 -0
  123. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_string.py +0 -0
  124. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_sync.py +0 -0
  125. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_table.py +0 -0
  126. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/_utils/_tmp.py +0 -0
  127. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/cli.py +0 -0
  128. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/exceptions.py +0 -0
  129. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/extraction.py +0 -0
  130. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/kreuzberg/py.typed +0 -0
  131. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/mkdocs.yaml +0 -0
  132. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/__init__.py +0 -0
  133. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/api/__init__.py +0 -0
  134. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/api/main_test.py +0 -0
  135. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/chunker_test.py +0 -0
  136. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/cli_integration_test.py +0 -0
  137. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/cli_test.py +0 -0
  138. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/config_test.py +0 -0
  139. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/conftest.py +0 -0
  140. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/document_classification_test.py +0 -0
  141. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/entity_extraction_test.py +0 -0
  142. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/exceptions_test.py +0 -0
  143. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extraction_batch_test.py +0 -0
  144. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extraction_test.py +0 -0
  145. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/__init__.py +0 -0
  146. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/email_comprehensive_test.py +0 -0
  147. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/email_test.py +0 -0
  148. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/html_test.py +0 -0
  149. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/image_test.py +0 -0
  150. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/pandoc_metadata_test.py +0 -0
  151. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/pandoc_test.py +0 -0
  152. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/pdf_test.py +0 -0
  153. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/presentation_test.py +0 -0
  154. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/spreed_sheet_test.py +0 -0
  155. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/extractors/structured_test.py +0 -0
  156. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/gmft_extended_test.py +0 -0
  157. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/gmft_test.py +0 -0
  158. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/hooks_test.py +0 -0
  159. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/language_detection_test.py +0 -0
  160. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/mcp_server_test.py +0 -0
  161. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/mime_types_test.py +0 -0
  162. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/multiprocessing/__init__.py +0 -0
  163. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/multiprocessing/gmft_integration_test.py +0 -0
  164. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/multiprocessing/gmft_isolated_test.py +0 -0
  165. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/multiprocessing/process_manager_test.py +0 -0
  166. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/multiprocessing/tesseract_pool_test.py +0 -0
  167. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/ocr/__init__.py +0 -0
  168. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/ocr/base_test.py +0 -0
  169. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/ocr/device_integration_test.py +0 -0
  170. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/ocr/easyocr_test.py +0 -0
  171. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/ocr/init_test.py +0 -0
  172. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/ocr/paddleocr_test.py +0 -0
  173. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/ocr/tesseract_test.py +0 -0
  174. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/playa_test.py +0 -0
  175. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/registry_test.py +0 -0
  176. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/better-ocr-image.jpg +0 -0
  177. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/contract.txt +0 -0
  178. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/contract_test.txt +0 -0
  179. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/document.docx +0 -0
  180. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/email/sample-email.eml +0 -0
  181. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
  182. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/excel.xlsx +0 -0
  183. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/form_test.txt +0 -0
  184. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/french-text.txt +0 -0
  185. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/german-text.txt +0 -0
  186. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/html.html +0 -0
  187. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/images/test_hello_world.png +0 -0
  188. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/invoice_image.png +0 -0
  189. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/invoice_test.txt +0 -0
  190. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/json/sample-document.json +0 -0
  191. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/layout-parser-ocr.jpg +0 -0
  192. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/markdown.md +0 -0
  193. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/non-ascii-text.pdf +0 -0
  194. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/non-searchable.pdf +0 -0
  195. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/ocr-image.jpg +0 -0
  196. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
  197. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
  198. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
  199. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
  200. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/receipt_test.txt +0 -0
  201. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/report_test.txt +0 -0
  202. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/sample-contract.pdf +0 -0
  203. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/scanned.pdf +0 -0
  204. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/searchable.pdf +0 -0
  205. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/spanish-text.txt +0 -0
  206. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/test-article.pdf +0 -0
  207. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/test_source_files/yaml/sample-config.yaml +0 -0
  208. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/types_test.py +0 -0
  209. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/__init__.py +0 -0
  210. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/cache_test.py +0 -0
  211. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/device_test.py +0 -0
  212. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/errors_test.py +0 -0
  213. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/pdf_lock_test.py +0 -0
  214. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/process_pool_test.py +0 -0
  215. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/serialization_test.py +0 -0
  216. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/string_test.py +0 -0
  217. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/sync_test.py +0 -0
  218. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/table_test.py +0 -0
  219. {kreuzberg-3.9.0 → kreuzberg-3.9.1}/tests/utils/tmp_test.py +0 -0
@@ -53,7 +53,7 @@ repos:
53
53
  hooks:
54
54
  - id: pyproject-fmt
55
55
  - repo: https://github.com/astral-sh/ruff-pre-commit
56
- rev: v0.12.3
56
+ rev: v0.12.5
57
57
  hooks:
58
58
  - id: ruff
59
59
  args: ["--fix", "--unsafe-fixes"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.9.0
3
+ Version: 3.9.1
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -29,12 +29,12 @@ Classifier: Topic :: Text Processing :: General
29
29
  Classifier: Typing :: Typed
30
30
  Requires-Python: >=3.10
31
31
  Requires-Dist: anyio>=4.9.0
32
- Requires-Dist: chardetng-py>=0.3.4
32
+ Requires-Dist: chardetng-py>=0.3.5
33
33
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
- Requires-Dist: html-to-markdown[lxml]>=1.8.0
35
- Requires-Dist: mcp>=1.11.0
34
+ Requires-Dist: html-to-markdown[lxml]>=1.9.0
35
+ Requires-Dist: mcp>=1.12.2
36
36
  Requires-Dist: msgspec>=0.18.0
37
- Requires-Dist: playa-pdf>=0.6.1
37
+ Requires-Dist: playa-pdf>=0.6.4
38
38
  Requires-Dist: psutil>=7.0.0
39
39
  Requires-Dist: pypdfium2==4.30.0
40
40
  Requires-Dist: python-calamine>=0.3.2
@@ -53,7 +53,7 @@ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'all
53
53
  Requires-Dist: mailparse>=1.0.15; extra == 'all'
54
54
  Requires-Dist: paddleocr>=3.1.0; extra == 'all'
55
55
  Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
56
- Requires-Dist: rich>=14.0.0; extra == 'all'
56
+ Requires-Dist: rich>=14.1.0; extra == 'all'
57
57
  Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
58
58
  Requires-Dist: setuptools>=80.9.0; extra == 'all'
59
59
  Requires-Dist: spacy>=3.8.7; extra == 'all'
@@ -67,7 +67,7 @@ Provides-Extra: chunking
67
67
  Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
68
68
  Provides-Extra: cli
69
69
  Requires-Dist: click>=8.2.1; extra == 'cli'
70
- Requires-Dist: rich>=14.0.0; extra == 'cli'
70
+ Requires-Dist: rich>=14.1.0; extra == 'cli'
71
71
  Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
72
72
  Provides-Extra: easyocr
73
73
  Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
@@ -130,14 +130,14 @@ Kreuzberg leverages established open source technologies:
130
130
  ### Extract Text with CLI
131
131
 
132
132
  ```bash
133
- # Extract text from any file to markdown
134
- uvx kreuzberg extract document.pdf > output.md
133
+ # Extract text from any file to text format
134
+ uvx kreuzberg extract document.pdf > output.txt
135
135
 
136
136
  # With all features (OCR, table extraction, etc.)
137
- uvx --from "kreuzberg[all]" kreuzberg extract invoice.pdf --ocr --format markdown
137
+ uvx --from "kreuzberg[all]" kreuzberg extract invoice.pdf --ocr-backend tesseract --output-format text
138
138
 
139
139
  # Extract with rich metadata
140
- uvx kreuzberg extract report.pdf --show-metadata --format json
140
+ uvx kreuzberg extract report.pdf --show-metadata --output-format json
141
141
  ```
142
142
 
143
143
  ### Python Usage
@@ -44,14 +44,14 @@ Kreuzberg leverages established open source technologies:
44
44
  ### Extract Text with CLI
45
45
 
46
46
  ```bash
47
- # Extract text from any file to markdown
48
- uvx kreuzberg extract document.pdf > output.md
47
+ # Extract text from any file to text format
48
+ uvx kreuzberg extract document.pdf > output.txt
49
49
 
50
50
  # With all features (OCR, table extraction, etc.)
51
- uvx --from "kreuzberg[all]" kreuzberg extract invoice.pdf --ocr --format markdown
51
+ uvx --from "kreuzberg[all]" kreuzberg extract invoice.pdf --ocr-backend tesseract --output-format text
52
52
 
53
53
  # Extract with rich metadata
54
- uvx kreuzberg extract report.pdf --show-metadata --format json
54
+ uvx kreuzberg extract report.pdf --show-metadata --output-format json
55
55
  ```
56
56
 
57
57
  ### Python Usage
@@ -5,7 +5,7 @@ requires = [ "hatchling" ]
5
5
 
6
6
  [project]
7
7
  name = "kreuzberg"
8
- version = "3.9.0"
8
+ version = "3.9.1"
9
9
  description = "Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats"
10
10
  readme = "README.md"
11
11
  keywords = [
@@ -58,12 +58,12 @@ classifiers = [
58
58
 
59
59
  dependencies = [
60
60
  "anyio>=4.9.0",
61
- "chardetng-py>=0.3.4",
61
+ "chardetng-py>=0.3.5",
62
62
  "exceptiongroup>=1.2.2; python_version<'3.11'",
63
- "html-to-markdown[lxml]>=1.8.0",
64
- "mcp>=1.11.0",
63
+ "html-to-markdown[lxml]>=1.9.0",
64
+ "mcp>=1.12.2",
65
65
  "msgspec>=0.18.0",
66
- "playa-pdf>=0.6.1", # pinned due to breaking changes in 0.5.0
66
+ "playa-pdf>=0.6.4", # pinned due to breaking changes in 0.5.0
67
67
  "psutil>=7.0.0",
68
68
  "pypdfium2==4.30.0", # pinned due to bug in 4.30.1, until v5 is stable
69
69
  "python-calamine>=0.3.2",
@@ -88,7 +88,7 @@ optional-dependencies.auto-classify-document-type = [
88
88
  optional-dependencies.chunking = [ "semantic-text-splitter>=0.27.0" ]
89
89
  optional-dependencies.cli = [
90
90
  "click>=8.2.1",
91
- "rich>=14.0.0",
91
+ "rich>=14.1.0",
92
92
  "tomli>=2.0.0; python_version<'3.11'",
93
93
  ]
94
94
  optional-dependencies.easyocr = [ "easyocr>=1.7.2" ]
@@ -115,7 +115,7 @@ dev = [
115
115
  "pytest-cov>=6.2.1",
116
116
  "pytest-mock>=3.14.0",
117
117
  "pytest-timeout>=2.4.0",
118
- "ruff>=0.12.1",
118
+ "ruff>=0.12.5",
119
119
  "trio>=0.30.0",
120
120
  "uv-bump",
121
121
  ]
@@ -216,7 +216,13 @@ max_supported_python = "3.13"
216
216
  [tool.pytest.ini_options]
217
217
  timeout = 300
218
218
  testpaths = [ "tests" ]
219
- norecursedirs = [ "benchmarks", "python-text-extraction-libs-benchmarks", "dist", "build", "*.egg" ]
219
+ norecursedirs = [
220
+ "benchmarks",
221
+ "python-text-extraction-libs-benchmarks",
222
+ "dist",
223
+ "build",
224
+ "*.egg",
225
+ ]
220
226
  filterwarnings = [
221
227
  "ignore:Exception ignored in:pytest.PytestUnraisableExceptionWarning",
222
228
  "ignore:pkg_resources is deprecated as an API:DeprecationWarning",