kreuzberg 3.6.2__tar.gz → 3.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/PKG-INFO +54 -2
  2. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/README.md +52 -1
  3. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/index.md +1 -0
  4. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/index.md +1 -0
  5. kreuzberg-3.7.0/docs/user-guide/mcp-server.md +571 -0
  6. kreuzberg-3.7.0/kreuzberg/_mcp/__init__.py +5 -0
  7. kreuzberg-3.7.0/kreuzberg/_mcp/server.py +227 -0
  8. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/mkdocs.yaml +1 -0
  9. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/pyproject.toml +3 -1
  10. kreuzberg-3.7.0/tests/mcp_server_test.py +374 -0
  11. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/uv.lock +170 -1
  12. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.commitlintrc +0 -0
  13. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.docker/Dockerfile +0 -0
  14. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.docker/README.md +0 -0
  15. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.dockerignore +0 -0
  16. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/dependabot.yaml +0 -0
  17. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/ci.yaml +0 -0
  18. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/docs.yml +0 -0
  19. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/pr-title.yaml +0 -0
  20. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/publish-docker.yml +0 -0
  21. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.github/workflows/release.yaml +0 -0
  22. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.gitignore +0 -0
  23. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.gitmodules +0 -0
  24. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.markdownlint.yaml +0 -0
  25. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/.pre-commit-config.yaml +0 -0
  26. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/LICENSE +0 -0
  27. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/ai-rulez.yaml +0 -0
  28. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/README.md +0 -0
  29. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/benchmark_baseline.py +0 -0
  30. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/end_to_end_benchmark.py +0 -0
  31. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/final_benchmark.py +0 -0
  32. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/pyproject.toml +0 -0
  33. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/baseline_results.json +0 -0
  34. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/benchmark_msgpack_20250702_003800.json +0 -0
  35. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/comprehensive_caching_results.json +0 -0
  36. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/final_benchmark_results.json +0 -0
  37. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/mime_caching_results.json +0 -0
  38. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/msgspec_caching_results.json +0 -0
  39. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/ocr_caching_results.json +0 -0
  40. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/serialization_benchmark_results.json +0 -0
  41. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/statistical_benchmark_results.json +0 -0
  42. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/results/table_caching_results.json +0 -0
  43. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/serialization_benchmark.py +0 -0
  44. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/__init__.py +0 -0
  45. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/__main__.py +0 -0
  46. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/benchmarks.py +0 -0
  47. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/cli.py +0 -0
  48. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/models.py +0 -0
  49. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/profiler.py +0 -0
  50. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/src/kreuzberg_benchmarks/runner.py +0 -0
  51. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/benchmarks/statistical_benchmark.py +0 -0
  52. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/custom-extractors.md +0 -0
  53. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/custom-hooks.md +0 -0
  54. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/error-handling.md +0 -0
  55. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/index.md +0 -0
  56. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/advanced/performance.md +0 -0
  57. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/exceptions.md +0 -0
  58. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/extraction-functions.md +0 -0
  59. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/extractor-registry.md +0 -0
  60. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/index.md +0 -0
  61. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/ocr-configuration.md +0 -0
  62. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/api-reference/types.md +0 -0
  63. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/assets/favicon.png +0 -0
  64. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/assets/logo.png +0 -0
  65. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/changelog.md +0 -0
  66. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/cli.md +0 -0
  67. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/contributing.md +0 -0
  68. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/css/extra.css +0 -0
  69. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/examples/extraction-examples.md +0 -0
  70. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/examples/index.md +0 -0
  71. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/getting-started/index.md +0 -0
  72. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/getting-started/installation.md +0 -0
  73. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/getting-started/quick-start.md +0 -0
  74. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/api-server.md +0 -0
  75. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/basic-usage.md +0 -0
  76. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/chunking.md +0 -0
  77. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/docker.md +0 -0
  78. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/extraction-configuration.md +0 -0
  79. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/metadata-extraction.md +0 -0
  80. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/ocr-backends.md +0 -0
  81. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/ocr-configuration.md +0 -0
  82. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/docs/user-guide/supported-formats.md +0 -0
  83. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/__init__.py +0 -0
  84. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/__main__.py +0 -0
  85. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_api/__init__.py +0 -0
  86. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_api/main.py +0 -0
  87. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_chunker.py +0 -0
  88. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_cli_config.py +0 -0
  89. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_constants.py +0 -0
  90. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_entity_extraction.py +0 -0
  91. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/__init__.py +0 -0
  92. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_base.py +0 -0
  93. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_html.py +0 -0
  94. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_image.py +0 -0
  95. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_pandoc.py +0 -0
  96. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_pdf.py +0 -0
  97. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_presentation.py +0 -0
  98. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_extractors/_spread_sheet.py +0 -0
  99. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_gmft.py +0 -0
  100. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_language_detection.py +0 -0
  101. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_mime_types.py +0 -0
  102. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/__init__.py +0 -0
  103. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/gmft_isolated.py +0 -0
  104. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/process_manager.py +0 -0
  105. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/sync_easyocr.py +0 -0
  106. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/sync_paddleocr.py +0 -0
  107. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/sync_tesseract.py +0 -0
  108. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_multiprocessing/tesseract_pool.py +0 -0
  109. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/__init__.py +0 -0
  110. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/_base.py +0 -0
  111. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/_easyocr.py +0 -0
  112. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/_paddleocr.py +0 -0
  113. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_ocr/_tesseract.py +0 -0
  114. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_playa.py +0 -0
  115. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_registry.py +0 -0
  116. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_types.py +0 -0
  117. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/__init__.py +0 -0
  118. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_cache.py +0 -0
  119. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_device.py +0 -0
  120. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_document_cache.py +0 -0
  121. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_errors.py +0 -0
  122. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_pdf_lock.py +0 -0
  123. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_process_pool.py +0 -0
  124. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_serialization.py +0 -0
  125. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_string.py +0 -0
  126. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_sync.py +0 -0
  127. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/_utils/_tmp.py +0 -0
  128. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/cli.py +0 -0
  129. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/exceptions.py +0 -0
  130. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/extraction.py +0 -0
  131. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/kreuzberg/py.typed +0 -0
  132. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/__init__.py +0 -0
  133. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/api/__init__.py +0 -0
  134. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/api/main_test.py +0 -0
  135. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/chunker_test.py +0 -0
  136. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/cli_integration_test.py +0 -0
  137. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/cli_test.py +0 -0
  138. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/conftest.py +0 -0
  139. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/entity_extraction_test.py +0 -0
  140. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/exceptions_test.py +0 -0
  141. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extraction_batch_test.py +0 -0
  142. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extraction_test.py +0 -0
  143. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/__init__.py +0 -0
  144. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/html_test.py +0 -0
  145. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/image_test.py +0 -0
  146. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/pandoc_metadata_test.py +0 -0
  147. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/pandoc_test.py +0 -0
  148. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/pdf_test.py +0 -0
  149. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/presentation_test.py +0 -0
  150. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/extractors/spreed_sheet_test.py +0 -0
  151. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/gmft_extended_test.py +0 -0
  152. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/gmft_test.py +0 -0
  153. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/hooks_test.py +0 -0
  154. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/language_detection_test.py +0 -0
  155. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/mime_types_test.py +0 -0
  156. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/__init__.py +0 -0
  157. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/gmft_integration_test.py +0 -0
  158. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/process_manager_test.py +0 -0
  159. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/sync_tesseract_test.py +0 -0
  160. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/multiprocessing/tesseract_pool_test.py +0 -0
  161. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/__init__.py +0 -0
  162. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/base_test.py +0 -0
  163. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/device_integration_test.py +0 -0
  164. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/easyocr_test.py +0 -0
  165. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/init_test.py +0 -0
  166. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/paddleocr_test.py +0 -0
  167. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/ocr/tesseract_test.py +0 -0
  168. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/playa_test.py +0 -0
  169. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/registry_test.py +0 -0
  170. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/document.docx +0 -0
  171. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/excel-multi-sheet.xlsx +0 -0
  172. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/excel.xlsx +0 -0
  173. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/french-text.txt +0 -0
  174. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/german-text.txt +0 -0
  175. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/html.html +0 -0
  176. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/markdown.md +0 -0
  177. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/non-ascii-text.pdf +0 -0
  178. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/non-searchable.pdf +0 -0
  179. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/ocr-image.jpg +0 -0
  180. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/pdfs_with_tables/large.pdf +0 -0
  181. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/pdfs_with_tables/medium.pdf +0 -0
  182. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/pdfs_with_tables/tiny.pdf +0 -0
  183. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/pitch-deck-presentation.pptx +0 -0
  184. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/sample-contract.pdf +0 -0
  185. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/scanned.pdf +0 -0
  186. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/searchable.pdf +0 -0
  187. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/spanish-text.txt +0 -0
  188. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/test_source_files/test-article.pdf +0 -0
  189. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/types_test.py +0 -0
  190. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/__init__.py +0 -0
  191. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/cache_test.py +0 -0
  192. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/device_test.py +0 -0
  193. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/errors_test.py +0 -0
  194. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/pdf_lock_test.py +0 -0
  195. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/process_pool_test.py +0 -0
  196. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/serialization_test.py +0 -0
  197. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/string_test.py +0 -0
  198. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/sync_test.py +0 -0
  199. {kreuzberg-3.6.2 → kreuzberg-3.7.0}/tests/utils/tmp_test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.6.2
3
+ Version: 3.7.0
4
4
  Summary: A text extraction library supporting PDFs, images, office documents and more
5
5
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
6
6
  Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
@@ -26,6 +26,7 @@ Requires-Dist: anyio>=4.9.0
26
26
  Requires-Dist: charset-normalizer>=3.4.2
27
27
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
28
28
  Requires-Dist: html-to-markdown[lxml]>=1.6.0
29
+ Requires-Dist: mcp>=1.11.0
29
30
  Requires-Dist: msgspec>=0.18.0
30
31
  Requires-Dist: playa-pdf>=0.6.1
31
32
  Requires-Dist: psutil>=7.0.0
@@ -90,7 +91,8 @@ Description-Content-Type: text/markdown
90
91
  - **🏠 Local Processing**: No cloud dependencies or external API calls
91
92
  - **📦 Rich Format Support**: PDFs, images, Office docs, HTML, and more
92
93
  - **🔍 Multiple OCR Engines**: Tesseract, EasyOCR, and PaddleOCR support
93
- - **🐳 Production Ready**: CLI, REST API, and Docker images included
94
+ - **🤖 AI Integration**: Native MCP server for Claude and other AI tools
95
+ - **🐳 Production Ready**: CLI, REST API, MCP server, and Docker images included
94
96
 
95
97
  ## Quick Start
96
98
 
@@ -136,6 +138,55 @@ asyncio.run(main())
136
138
 
137
139
  ## Deployment Options
138
140
 
141
+ ### 🤖 MCP Server (AI Integration)
142
+
143
+ **Connect directly to Claude Desktop, Cursor, and other AI tools with the Model Context Protocol:**
144
+
145
+ ```bash
146
+ # Install and run MCP server with all features (recommended)
147
+ pip install "kreuzberg[all]"
148
+ kreuzberg-mcp
149
+
150
+ # Or with uvx (recommended for Claude Desktop)
151
+ uvx --with "kreuzberg[all]" kreuzberg-mcp
152
+
153
+ # Basic installation (core features only)
154
+ pip install kreuzberg
155
+ kreuzberg-mcp
156
+ ```
157
+
158
+ **Configure in Claude Desktop (`claude_desktop_config.json`):**
159
+
160
+ ```json
161
+ {
162
+ "mcpServers": {
163
+ "kreuzberg": {
164
+ "command": "uvx",
165
+ "args": ["--with", "kreuzberg[all]", "kreuzberg-mcp"]
166
+ }
167
+ }
168
+ }
169
+ ```
170
+
171
+ **Basic configuration (core features only):**
172
+
173
+ ```json
174
+ {
175
+ "mcpServers": {
176
+ "kreuzberg": {
177
+ "command": "uvx",
178
+ "args": ["kreuzberg-mcp"]
179
+ }
180
+ }
181
+ }
182
+ ```
183
+
184
+ **Available MCP capabilities:**
185
+
186
+ - **Tools**: `extract_document`, `extract_bytes`, `extract_simple`
187
+ - **Resources**: Configuration, supported formats, OCR backends
188
+ - **Prompts**: Extract-and-summarize, structured analysis workflows
189
+
139
190
  ### 🐳 Docker (Recommended)
140
191
 
141
192
  ```bash
@@ -221,6 +272,7 @@ kreuzberg extract *.pdf --output-dir ./extracted/
221
272
 
222
273
  ## Advanced Features
223
274
 
275
+ - **🤖 MCP Server**: Native integration with Claude Desktop and AI tools
224
276
  - **📊 Table Extraction**: Extract tables from PDFs with GMFT
225
277
  - **🧩 Content Chunking**: Split documents for RAG applications
226
278
  - **🎯 Custom Extractors**: Extend with your own document handlers
@@ -18,7 +18,8 @@
18
18
  - **🏠 Local Processing**: No cloud dependencies or external API calls
19
19
  - **📦 Rich Format Support**: PDFs, images, Office docs, HTML, and more
20
20
  - **🔍 Multiple OCR Engines**: Tesseract, EasyOCR, and PaddleOCR support
21
- - **🐳 Production Ready**: CLI, REST API, and Docker images included
21
+ - **🤖 AI Integration**: Native MCP server for Claude and other AI tools
22
+ - **🐳 Production Ready**: CLI, REST API, MCP server, and Docker images included
22
23
 
23
24
  ## Quick Start
24
25
 
@@ -64,6 +65,55 @@ asyncio.run(main())
64
65
 
65
66
  ## Deployment Options
66
67
 
68
+ ### 🤖 MCP Server (AI Integration)
69
+
70
+ **Connect directly to Claude Desktop, Cursor, and other AI tools with the Model Context Protocol:**
71
+
72
+ ```bash
73
+ # Install and run MCP server with all features (recommended)
74
+ pip install "kreuzberg[all]"
75
+ kreuzberg-mcp
76
+
77
+ # Or with uvx (recommended for Claude Desktop)
78
+ uvx --with "kreuzberg[all]" kreuzberg-mcp
79
+
80
+ # Basic installation (core features only)
81
+ pip install kreuzberg
82
+ kreuzberg-mcp
83
+ ```
84
+
85
+ **Configure in Claude Desktop (`claude_desktop_config.json`):**
86
+
87
+ ```json
88
+ {
89
+ "mcpServers": {
90
+ "kreuzberg": {
91
+ "command": "uvx",
92
+ "args": ["--with", "kreuzberg[all]", "kreuzberg-mcp"]
93
+ }
94
+ }
95
+ }
96
+ ```
97
+
98
+ **Basic configuration (core features only):**
99
+
100
+ ```json
101
+ {
102
+ "mcpServers": {
103
+ "kreuzberg": {
104
+ "command": "uvx",
105
+ "args": ["kreuzberg-mcp"]
106
+ }
107
+ }
108
+ }
109
+ ```
110
+
111
+ **Available MCP capabilities:**
112
+
113
+ - **Tools**: `extract_document`, `extract_bytes`, `extract_simple`
114
+ - **Resources**: Configuration, supported formats, OCR backends
115
+ - **Prompts**: Extract-and-summarize, structured analysis workflows
116
+
67
117
  ### 🐳 Docker (Recommended)
68
118
 
69
119
  ```bash
@@ -149,6 +199,7 @@ kreuzberg extract *.pdf --output-dir ./extracted/
149
199
 
150
200
  ## Advanced Features
151
201
 
202
+ - **🤖 MCP Server**: Native integration with Claude Desktop and AI tools
152
203
  - **📊 Table Extraction**: Extract tables from PDFs with GMFT
153
204
  - **🧩 Content Chunking**: Split documents for RAG applications
154
205
  - **🎯 Custom Extractors**: Extend with your own document handlers
@@ -6,6 +6,7 @@ Kreuzberg is a Python library for text extraction from documents. It provides a
6
6
 
7
7
  - **Simple and Hassle-Free**: Clean API that just works, without complex configuration
8
8
  - **Local Processing**: No external API calls or cloud dependencies required
9
+ - **AI Integration**: Native MCP server for Claude Desktop and other AI tools
9
10
  - **Resource Efficient**: Lightweight processing without GPU requirements
10
11
  - **Small Package Size**: Has few curated dependencies and a minimal footprint
11
12
  - **Format Support**: Comprehensive support for documents, images, and text formats
@@ -11,6 +11,7 @@ This guide covers the main concepts and usage patterns of Kreuzberg.
11
11
  - [OCR Configuration](ocr-configuration.md) - Configure OCR settings ([API](../api-reference/ocr-configuration.md))
12
12
  - [OCR Backends](ocr-backends.md) - Choose and configure different OCR engines
13
13
  - [Supported Formats](supported-formats.md) - All supported document formats
14
+ - [MCP Server](mcp-server.md) - Model Context Protocol server for AI integration
14
15
  - [API Server](api-server.md) - REST API for document extraction
15
16
  - [Docker](docker.md) - Using Kreuzberg with Docker
16
17