kreuzberg 3.3.0__py3-none-any.whl → 3.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. kreuzberg/__init__.py +9 -2
  2. kreuzberg/_api/__init__.py +0 -0
  3. kreuzberg/_api/main.py +87 -0
  4. kreuzberg/_entity_extraction.py +238 -0
  5. kreuzberg/_extractors/_base.py +39 -1
  6. kreuzberg/_extractors/_email.py +149 -0
  7. kreuzberg/_extractors/_html.py +15 -3
  8. kreuzberg/_extractors/_image.py +27 -22
  9. kreuzberg/_extractors/_pandoc.py +3 -14
  10. kreuzberg/_extractors/_pdf.py +97 -34
  11. kreuzberg/_extractors/_presentation.py +62 -10
  12. kreuzberg/_extractors/_spread_sheet.py +181 -6
  13. kreuzberg/_extractors/_structured.py +148 -0
  14. kreuzberg/_gmft.py +318 -11
  15. kreuzberg/_language_detection.py +95 -0
  16. kreuzberg/_mcp/__init__.py +5 -0
  17. kreuzberg/_mcp/server.py +227 -0
  18. kreuzberg/_mime_types.py +27 -1
  19. kreuzberg/_ocr/__init__.py +10 -1
  20. kreuzberg/_ocr/_base.py +59 -0
  21. kreuzberg/_ocr/_easyocr.py +92 -1
  22. kreuzberg/_ocr/_paddleocr.py +89 -0
  23. kreuzberg/_ocr/_tesseract.py +569 -5
  24. kreuzberg/_registry.py +4 -0
  25. kreuzberg/_types.py +181 -4
  26. kreuzberg/_utils/_cache.py +52 -4
  27. kreuzberg/_utils/_device.py +2 -2
  28. kreuzberg/_utils/_errors.py +3 -7
  29. kreuzberg/_utils/_process_pool.py +182 -9
  30. kreuzberg/_utils/_quality.py +237 -0
  31. kreuzberg/_utils/_serialization.py +4 -2
  32. kreuzberg/_utils/_string.py +153 -10
  33. kreuzberg/_utils/_sync.py +6 -7
  34. kreuzberg/_utils/_table.py +261 -0
  35. kreuzberg/_utils/_tmp.py +2 -2
  36. kreuzberg/cli.py +1 -2
  37. kreuzberg/extraction.py +43 -34
  38. kreuzberg-3.8.1.dist-info/METADATA +301 -0
  39. kreuzberg-3.8.1.dist-info/RECORD +53 -0
  40. {kreuzberg-3.3.0.dist-info → kreuzberg-3.8.1.dist-info}/entry_points.txt +1 -0
  41. kreuzberg/_multiprocessing/__init__.py +0 -6
  42. kreuzberg/_multiprocessing/gmft_isolated.py +0 -332
  43. kreuzberg/_multiprocessing/process_manager.py +0 -188
  44. kreuzberg/_multiprocessing/sync_tesseract.py +0 -261
  45. kreuzberg/_multiprocessing/tesseract_pool.py +0 -359
  46. kreuzberg-3.3.0.dist-info/METADATA +0 -235
  47. kreuzberg-3.3.0.dist-info/RECORD +0 -48
  48. {kreuzberg-3.3.0.dist-info → kreuzberg-3.8.1.dist-info}/WHEEL +0 -0
  49. {kreuzberg-3.3.0.dist-info → kreuzberg-3.8.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,301 @@
1
+ Metadata-Version: 2.4
2
+ Name: kreuzberg
3
+ Version: 3.8.1
4
+ Summary: Advanced document intelligence framework for extracting structured content from PDFs, images, and office documents
5
+ Project-URL: homepage, https://github.com/Goldziher/kreuzberg
6
+ Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Keywords: automation,content-extraction,data-processing,document-analysis,document-intelligence,document-processing,entity-extraction,image-to-text,information-extraction,ocr,pdf-extraction,rag,structured-data,table-extraction,text-extraction
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Information Technology
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Database
22
+ Classifier: Topic :: Multimedia :: Graphics :: Capture :: Scanners
23
+ Classifier: Topic :: Office/Business :: Office Suites
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
+ Classifier: Topic :: Text Processing :: General
28
+ Classifier: Typing :: Typed
29
+ Requires-Python: >=3.10
30
+ Requires-Dist: anyio>=4.9.0
31
+ Requires-Dist: chardetng-py>=0.3.4
32
+ Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
33
+ Requires-Dist: html-to-markdown[lxml]>=1.8.0
34
+ Requires-Dist: mcp>=1.11.0
35
+ Requires-Dist: msgspec>=0.18.0
36
+ Requires-Dist: playa-pdf>=0.6.1
37
+ Requires-Dist: psutil>=7.0.0
38
+ Requires-Dist: pypdfium2==4.30.0
39
+ Requires-Dist: python-calamine>=0.3.2
40
+ Requires-Dist: python-pptx>=1.0.2
41
+ Requires-Dist: typing-extensions>=4.14.0; python_version < '3.12'
42
+ Provides-Extra: additional-extensions
43
+ Requires-Dist: mailparse>=1.0.15; extra == 'additional-extensions'
44
+ Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'additional-extensions'
45
+ Provides-Extra: all
46
+ Requires-Dist: click>=8.2.1; extra == 'all'
47
+ Requires-Dist: easyocr>=1.7.2; extra == 'all'
48
+ Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
49
+ Requires-Dist: gmft>=0.4.2; extra == 'all'
50
+ Requires-Dist: keybert>=0.9.0; extra == 'all'
51
+ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'all'
52
+ Requires-Dist: mailparse>=1.0.15; extra == 'all'
53
+ Requires-Dist: paddleocr>=3.1.0; extra == 'all'
54
+ Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
55
+ Requires-Dist: rich>=14.0.0; extra == 'all'
56
+ Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
57
+ Requires-Dist: setuptools>=80.9.0; extra == 'all'
58
+ Requires-Dist: spacy>=3.8.7; extra == 'all'
59
+ Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
60
+ Provides-Extra: api
61
+ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'api'
62
+ Provides-Extra: chunking
63
+ Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
64
+ Provides-Extra: cli
65
+ Requires-Dist: click>=8.2.1; extra == 'cli'
66
+ Requires-Dist: rich>=14.0.0; extra == 'cli'
67
+ Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
68
+ Provides-Extra: easyocr
69
+ Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
70
+ Provides-Extra: entity-extraction
71
+ Requires-Dist: keybert>=0.9.0; extra == 'entity-extraction'
72
+ Requires-Dist: spacy>=3.8.7; extra == 'entity-extraction'
73
+ Provides-Extra: gmft
74
+ Requires-Dist: gmft>=0.4.2; extra == 'gmft'
75
+ Provides-Extra: langdetect
76
+ Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
77
+ Provides-Extra: paddleocr
78
+ Requires-Dist: paddleocr>=3.1.0; extra == 'paddleocr'
79
+ Requires-Dist: paddlepaddle>=3.1.0; extra == 'paddleocr'
80
+ Requires-Dist: setuptools>=80.9.0; extra == 'paddleocr'
81
+ Description-Content-Type: text/markdown
82
+
83
+ # Kreuzberg
84
+
85
+ [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
86
+ [![PyPI version](https://badge.fury.io/py/kreuzberg.svg)](https://badge.fury.io/py/kreuzberg)
87
+ [![Documentation](https://img.shields.io/badge/docs-GitHub_Pages-blue)](https://goldziher.github.io/kreuzberg/)
88
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
89
+ [![Test Coverage](https://img.shields.io/badge/coverage-95%25-green)](https://github.com/Goldziher/kreuzberg)
90
+
91
+ **Advanced Document Intelligence for Modern Python Applications.** Transform PDFs, images, and office documents into structured data with production-grade performance. Built by engineers who understand that speed, reliability, and developer experience matter.
92
+
93
+ 📖 **[Complete Documentation](https://goldziher.github.io/kreuzberg/)**
94
+
95
+ ## Why Choose Kreuzberg?
96
+
97
+ ### ⚡ Proven Performance
98
+
99
+ [Benchmarked](https://goldziher.github.io/python-text-extraction-libs-benchmarks/) 6-126x faster than alternatives while using minimal resources. Process up to 14 files per second with 87MB install size and ~360MB memory usage. Optimized for production workloads and resource-constrained environments.
100
+
101
+ ### 🏗️ Production Engineering
102
+
103
+ Comprehensive test coverage (95%+), robust error handling, and true async/await support. Built with modern Python practices for reliability in production environments.
104
+
105
+ ### 🔧 Developer Experience
106
+
107
+ Works immediately with smart defaults, scales as you grow. Native MCP integration for AI tools, full type safety, and clear documentation.
108
+
109
+ ### 🚀 Flexible Deployment
110
+
111
+ Deploy on serverless platforms, containers, or traditional servers. Supports both CPU and GPU processing (via PaddleOCR and EasyOCR). No external API dependencies. Multiple deployment modes: CLI, REST API, MCP server.
112
+
113
+ ### 📄 Comprehensive Format Support
114
+
115
+ Extract from PDFs, images, Office documents, HTML, spreadsheets, and presentations. Multiple OCR engines with intelligent fallbacks, table extraction, and content preparation for RAG workflows.
116
+
117
+ ## Quick Start
118
+
119
+ ### Installation
120
+
121
+ ```bash
122
+ # Basic installation
123
+ pip install kreuzberg
124
+
125
+ # With optional features
126
+ pip install "kreuzberg[cli,api]" # CLI + REST API
127
+ pip install "kreuzberg[easyocr,gmft]" # EasyOCR + table extraction
128
+ pip install "kreuzberg[all]" # Everything
129
+ ```
130
+
131
+ ### System Dependencies
132
+
133
+ ```bash
134
+ # Ubuntu/Debian
135
+ sudo apt-get install tesseract-ocr pandoc
136
+
137
+ # macOS
138
+ brew install tesseract pandoc
139
+
140
+ # Windows
141
+ choco install tesseract pandoc
142
+ ```
143
+
144
+ ### Basic Usage
145
+
146
+ ```python
147
+ import asyncio
148
+ from kreuzberg import extract_file
149
+
150
+ async def main():
151
+ # Extract content from files
152
+ result = await extract_file("document.pdf")
153
+ print(result.content)
154
+ print(result.metadata)
155
+
156
+ asyncio.run(main())
157
+ ```
158
+
159
+ ## Deployment Options
160
+
161
+ ### 🤖 MCP Server (AI Integration)
162
+
163
+ **Connect directly to Claude Desktop, Cursor, and other AI tools with the Model Context Protocol:**
164
+
165
+ ```bash
166
+ # Install and run MCP server with all features (recommended)
167
+ pip install "kreuzberg[all]"
168
+ kreuzberg-mcp
169
+
170
+ # Or with uvx (recommended for Claude Desktop)
171
+ uvx --with "kreuzberg[all]" kreuzberg-mcp
172
+
173
+ # Basic installation (core features only)
174
+ pip install kreuzberg
175
+ kreuzberg-mcp
176
+ ```
177
+
178
+ **Configure in Claude Desktop (`claude_desktop_config.json`):**
179
+
180
+ ```json
181
+ {
182
+ "mcpServers": {
183
+ "kreuzberg": {
184
+ "command": "uvx",
185
+ "args": ["--with", "kreuzberg[all]", "kreuzberg-mcp"]
186
+ }
187
+ }
188
+ }
189
+ ```
190
+
191
+ **Basic configuration (core features only):**
192
+
193
+ ```json
194
+ {
195
+ "mcpServers": {
196
+ "kreuzberg": {
197
+ "command": "uvx",
198
+ "args": ["kreuzberg-mcp"]
199
+ }
200
+ }
201
+ }
202
+ ```
203
+
204
+ **Available MCP capabilities:**
205
+
206
+ - **Tools**: `extract_document`, `extract_bytes`, `extract_simple`
207
+ - **Resources**: Configuration, supported formats, OCR backends
208
+ - **Prompts**: Extract-and-summarize, structured analysis workflows
209
+
210
+ ### 🐳 Docker (Recommended)
211
+
212
+ ```bash
213
+ # Run API server
214
+ docker run -p 8000:8000 goldziher/kreuzberg:latest
215
+
216
+ # Extract files
217
+ curl -X POST http://localhost:8000/extract -F "data=@document.pdf"
218
+ ```
219
+
220
+ Available variants: `latest`, `v3.8.0`, `v3.8.0-easyocr`, `v3.8.0-paddle`, `v3.8.0-gmft`, `v3.8.0-all`
221
+
222
+ ### 🌐 REST API
223
+
224
+ ```bash
225
+ # Install and run
226
+ pip install "kreuzberg[api]"
227
+ litestar --app kreuzberg._api.main:app run
228
+
229
+ # Health check
230
+ curl http://localhost:8000/health
231
+
232
+ # Extract files
233
+ curl -X POST http://localhost:8000/extract -F "data=@file.pdf"
234
+ ```
235
+
236
+ ### 💻 Command Line
237
+
238
+ ```bash
239
+ # Install CLI
240
+ pip install "kreuzberg[cli]"
241
+
242
+ # Extract to stdout
243
+ kreuzberg extract document.pdf
244
+
245
+ # JSON output with metadata
246
+ kreuzberg extract document.pdf --output-format json --show-metadata
247
+
248
+ # Batch processing
249
+ kreuzberg extract *.pdf --output-dir ./extracted/
250
+ ```
251
+
252
+ ## Supported Formats
253
+
254
+ | Category | Formats |
255
+ | ----------------- | ------------------------------ |
256
+ | **Documents** | PDF, DOCX, DOC, RTF, TXT, EPUB |
257
+ | **Images** | JPG, PNG, TIFF, BMP, GIF, WEBP |
258
+ | **Spreadsheets** | XLSX, XLS, CSV, ODS |
259
+ | **Presentations** | PPTX, PPT, ODP |
260
+ | **Web** | HTML, XML, MHTML |
261
+ | **Archives** | Support via extraction |
262
+
263
+ ## 📊 Performance Comparison
264
+
265
+ [Comprehensive benchmarks](https://goldziher.github.io/python-text-extraction-libs-benchmarks/) across ~100 real-world documents • [View source](https://github.com/Goldziher/python-text-extraction-libs-benchmarks) • [**Detailed Analysis**](https://goldziher.github.io/kreuzberg/performance-analysis/):
266
+
267
+ | Framework | Speed | Memory | Install Size | Dependencies | Success Rate |
268
+ | ------------- | ------------ | ------ | ------------ | ------------ | ------------ |
269
+ | **Kreuzberg** | 14.4 files/s | 360MB | 87MB | 43 | 100% |
270
+ | Unstructured | ~12 files/s | ~1GB | 146MB | 54 | 88%+ |
271
+ | MarkItDown | ~15 files/s | ~1.5GB | 251MB | 25 | 80%\* |
272
+ | Docling | ~1 file/min | ~5GB | 1,032MB | 88 | 45%\* |
273
+
274
+ \*_Performance varies significantly with document complexity and size_
275
+
276
+ **Key strengths:**
277
+
278
+ - 6-126x faster processing than comparable frameworks
279
+ - Smallest installation footprint and memory usage
280
+ - Only framework with built-in async/await support
281
+ - Supports both CPU and GPU processing
282
+ - Built by software engineers for production reliability
283
+
284
+ > **Benchmark details**: Tests include PDFs, Word docs, HTML, images, and spreadsheets in multiple languages (English, Hebrew, German, Chinese, Japanese, Korean) on standardized hardware.
285
+
286
+ ## Documentation
287
+
288
+ ### Quick Links
289
+
290
+ - [Installation Guide](https://goldziher.github.io/kreuzberg/getting-started/installation/) - Setup and dependencies
291
+ - [User Guide](https://goldziher.github.io/kreuzberg/user-guide/) - Comprehensive usage guide
292
+ - [Performance Analysis](https://goldziher.github.io/kreuzberg/performance-analysis/) - Detailed benchmark results
293
+ - [API Reference](https://goldziher.github.io/kreuzberg/api-reference/) - Complete API documentation
294
+ - [Docker Guide](https://goldziher.github.io/kreuzberg/user-guide/docker/) - Container deployment
295
+ - [REST API](https://goldziher.github.io/kreuzberg/user-guide/api-server/) - HTTP endpoints
296
+ - [CLI Guide](https://goldziher.github.io/kreuzberg/cli/) - Command-line usage
297
+ - [OCR Configuration](https://goldziher.github.io/kreuzberg/user-guide/ocr-configuration/) - OCR engine setup
298
+
299
+ ## License
300
+
301
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,53 @@
1
+ kreuzberg/__init__.py,sha256=wVxbug-w1cO2xHcP04Bf6QeIKmT2Ep6aeenb8EOYLA0,1534
2
+ kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
3
+ kreuzberg/_chunker.py,sha256=2eHSRHcZdJ2ZjR3in49y3o9tPl5HMO3vkbnMqaVCbHI,1887
4
+ kreuzberg/_cli_config.py,sha256=WD_seFjbuay_NJv77vGLBW6BVV9WZNujdzf3zQkhzPc,5691
5
+ kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
6
+ kreuzberg/_entity_extraction.py,sha256=nqpQPmR2Rf1vOwoQsjm22nPLDIcsXdYfMwCL3h8iUTQ,7802
7
+ kreuzberg/_gmft.py,sha256=Heovj2n2kgi7eHtvvRzpBgSLGyXjz8M9PAQMX-npd40,25295
8
+ kreuzberg/_language_detection.py,sha256=22-uXoOu_ws0K8Hz2M7U_SF9QX3npRYLhntAE1dNLFU,3283
9
+ kreuzberg/_mime_types.py,sha256=OhJ6gEyyLHjyvRtkk37zyLFBsRcSd_QybBaV8TxinIg,8471
10
+ kreuzberg/_playa.py,sha256=rU6ii2Qnrj8tkDYlSiab5h-BCYLJnUg4QwSLVDEXF5g,11883
11
+ kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
12
+ kreuzberg/_types.py,sha256=R_0Xc2kq4nEwkruvkB3qfrLeJ996419hBQ_1C6Xrqjo,13388
13
+ kreuzberg/cli.py,sha256=H9xxh4-zhGLfbhya2iD-NcEs-BvajVttm6cSiNx3ANU,12452
14
+ kreuzberg/exceptions.py,sha256=xRaiJh11i8E6Nc-gAQPgNW5xvhiiFBhRS-CBbCEbHQM,2881
15
+ kreuzberg/extraction.py,sha256=hY5d4oelwocX6eOBF0Bu3nHCcCbTL5JOIbaPCCFNKsU,16972
16
+ kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ kreuzberg/_api/main.py,sha256=kZCMPPzP4BGzEege9pdhQTJPKKVjCaC6kZdMMeaqP2M,2599
19
+ kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ kreuzberg/_extractors/_base.py,sha256=yNVQSECFad-8_MjqpQZ4q0jQoNdzP6-tqw6l3TfgsMc,4418
21
+ kreuzberg/_extractors/_email.py,sha256=6-Mk1TRXPyy9ylWKCpgdrogyzhiFnJOTuTRld1ghO8I,5695
22
+ kreuzberg/_extractors/_html.py,sha256=lOM1Tgrrvd7vpEeFAxC1dp0Tibr6N2FEHCjgFx0FK64,1745
23
+ kreuzberg/_extractors/_image.py,sha256=eZ7mR4F-mTwYwUzd70xrY7SZYZrNiDxnP5bYDY5P75U,4455
24
+ kreuzberg/_extractors/_pandoc.py,sha256=51k7XISfKaPorhapG7aIeQb94KGsfozxKyT2rwhk9Bk,26553
25
+ kreuzberg/_extractors/_pdf.py,sha256=Deb1ZIcqDY18CHa7cJL4vO4S7gy09yXWNSuH7O7kSzY,16430
26
+ kreuzberg/_extractors/_presentation.py,sha256=CUlqZl_QCdJdumsZh0BpROkFbvi9uq7yMoIt3bRTUeE,10859
27
+ kreuzberg/_extractors/_spread_sheet.py,sha256=Nvyz7XT7C2ai4QeUashBeENQpuP5rs8SmKfumxEqlCg,13712
28
+ kreuzberg/_extractors/_structured.py,sha256=i3jAvhHZt_BsRGgZZfgcsUqlwAg_RNc8vsuecb04T0c,5581
29
+ kreuzberg/_mcp/__init__.py,sha256=8PYV-omC8Rln7Cove8C3rHu3d7sR1FuiwSBG1O7vkAE,92
30
+ kreuzberg/_mcp/server.py,sha256=BQHeKI89aKf24BIE4n6m8r1rVA1Zgt6vM8Ki_OHuGnc,6780
31
+ kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
32
+ kreuzberg/_ocr/_base.py,sha256=CUzYMsJjCqCmHzWckmDeIB2L5hd261xrPrK8Ql-Gdm0,3876
33
+ kreuzberg/_ocr/_easyocr.py,sha256=sWyVnF7My4F1GU-IPSVtpaDJPYogw8N-NYxwuy-6loc,17098
34
+ kreuzberg/_ocr/_paddleocr.py,sha256=nXfQq6t2a7O-IpbCZRv8BvzP_lEBLgyYwXI5-wjzec0,17480
35
+ kreuzberg/_ocr/_tesseract.py,sha256=RjJ_C8c74LmLN53sdDo8WPCpUYeJ6fmRwsQdp6dJYio,31490
36
+ kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
+ kreuzberg/_utils/_cache.py,sha256=6T2K9BXWaPkEKphSFrfXtFFE7ck5q9CYV9NmAFS56e4,15204
38
+ kreuzberg/_utils/_device.py,sha256=rnaSSB5ibf2wr7EDxrcmOUZ4Ocor0pHkwb3N1pC46EY,10276
39
+ kreuzberg/_utils/_document_cache.py,sha256=z8irioKsOu8xve1YgHatm__wIFvs9I1gDK3tLNsNyqM,6926
40
+ kreuzberg/_utils/_errors.py,sha256=4OseKJI5qscD9jHxpP8CtpPWNHAOdhrJwcg6dlQl2fk,6310
41
+ kreuzberg/_utils/_pdf_lock.py,sha256=nqxAYCNlfWDrJtP4ZNu57st1YnkDl-gYXdr0q8nv0kA,1961
42
+ kreuzberg/_utils/_process_pool.py,sha256=4BqhmRspwMyPT2EBfTu_rrn7v722wlMLD8qlYvYsc00,8621
43
+ kreuzberg/_utils/_quality.py,sha256=dgFLt40NSqB8Ciej5QcZQLiV4U7LcrGux0vXckiE31U,7568
44
+ kreuzberg/_utils/_serialization.py,sha256=Rt5zSkvzf1SVNDrI6F2Zvnkel24mQkD1QvP0WjgZUgk,2195
45
+ kreuzberg/_utils/_string.py,sha256=5YKu9EZlZQ-LkphXUq8fdwKQrX9jWACFEhMGfjIysf4,6381
46
+ kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
47
+ kreuzberg/_utils/_table.py,sha256=C2skLtcyczxDEH33Qw2dOwnR15SGillvNEP-NzBG3R8,8156
48
+ kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
49
+ kreuzberg-3.8.1.dist-info/METADATA,sha256=IqJ6RTcFlwkMN6JZIkb9c8O4rgTrPqIuzXWerD6He1I,11507
50
+ kreuzberg-3.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
51
+ kreuzberg-3.8.1.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
52
+ kreuzberg-3.8.1.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
53
+ kreuzberg-3.8.1.dist-info/RECORD,,
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  kreuzberg = kreuzberg.cli:cli
3
+ kreuzberg-mcp = kreuzberg._mcp.server:main
@@ -1,6 +0,0 @@
1
- """Multiprocessing utilities for kreuzberg."""
2
-
3
- from .process_manager import ProcessPoolManager
4
- from .tesseract_pool import TesseractProcessPool
5
-
6
- __all__ = ["ProcessPoolManager", "TesseractProcessPool"]