kreuzberg 3.3.0__py3-none-any.whl → 3.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/__init__.py +9 -2
- kreuzberg/_api/__init__.py +0 -0
- kreuzberg/_api/main.py +87 -0
- kreuzberg/_entity_extraction.py +238 -0
- kreuzberg/_extractors/_base.py +39 -1
- kreuzberg/_extractors/_email.py +149 -0
- kreuzberg/_extractors/_html.py +15 -3
- kreuzberg/_extractors/_image.py +27 -22
- kreuzberg/_extractors/_pandoc.py +3 -14
- kreuzberg/_extractors/_pdf.py +97 -34
- kreuzberg/_extractors/_presentation.py +62 -10
- kreuzberg/_extractors/_spread_sheet.py +181 -6
- kreuzberg/_extractors/_structured.py +148 -0
- kreuzberg/_gmft.py +318 -11
- kreuzberg/_language_detection.py +95 -0
- kreuzberg/_mcp/__init__.py +5 -0
- kreuzberg/_mcp/server.py +227 -0
- kreuzberg/_mime_types.py +27 -1
- kreuzberg/_ocr/__init__.py +10 -1
- kreuzberg/_ocr/_base.py +59 -0
- kreuzberg/_ocr/_easyocr.py +92 -1
- kreuzberg/_ocr/_paddleocr.py +89 -0
- kreuzberg/_ocr/_tesseract.py +569 -5
- kreuzberg/_registry.py +4 -0
- kreuzberg/_types.py +181 -4
- kreuzberg/_utils/_cache.py +52 -4
- kreuzberg/_utils/_device.py +2 -2
- kreuzberg/_utils/_errors.py +3 -7
- kreuzberg/_utils/_process_pool.py +182 -9
- kreuzberg/_utils/_quality.py +237 -0
- kreuzberg/_utils/_serialization.py +4 -2
- kreuzberg/_utils/_string.py +153 -10
- kreuzberg/_utils/_sync.py +6 -7
- kreuzberg/_utils/_table.py +261 -0
- kreuzberg/_utils/_tmp.py +2 -2
- kreuzberg/cli.py +1 -2
- kreuzberg/extraction.py +43 -34
- kreuzberg-3.8.1.dist-info/METADATA +301 -0
- kreuzberg-3.8.1.dist-info/RECORD +53 -0
- {kreuzberg-3.3.0.dist-info → kreuzberg-3.8.1.dist-info}/entry_points.txt +1 -0
- kreuzberg/_multiprocessing/__init__.py +0 -6
- kreuzberg/_multiprocessing/gmft_isolated.py +0 -332
- kreuzberg/_multiprocessing/process_manager.py +0 -188
- kreuzberg/_multiprocessing/sync_tesseract.py +0 -261
- kreuzberg/_multiprocessing/tesseract_pool.py +0 -359
- kreuzberg-3.3.0.dist-info/METADATA +0 -235
- kreuzberg-3.3.0.dist-info/RECORD +0 -48
- {kreuzberg-3.3.0.dist-info → kreuzberg-3.8.1.dist-info}/WHEEL +0 -0
- {kreuzberg-3.3.0.dist-info → kreuzberg-3.8.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,301 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: kreuzberg
|
3
|
+
Version: 3.8.1
|
4
|
+
Summary: Advanced document intelligence framework for extracting structured content from PDFs, images, and office documents
|
5
|
+
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
6
|
+
Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
|
7
|
+
License: MIT
|
8
|
+
License-File: LICENSE
|
9
|
+
Keywords: automation,content-extraction,data-processing,document-analysis,document-intelligence,document-processing,entity-extraction,image-to-text,information-extraction,ocr,pdf-extraction,rag,structured-data,table-extraction,text-extraction
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
11
|
+
Classifier: Intended Audience :: Developers
|
12
|
+
Classifier: Intended Audience :: Information Technology
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
15
|
+
Classifier: Operating System :: OS Independent
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
21
|
+
Classifier: Topic :: Database
|
22
|
+
Classifier: Topic :: Multimedia :: Graphics :: Capture :: Scanners
|
23
|
+
Classifier: Topic :: Office/Business :: Office Suites
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
27
|
+
Classifier: Topic :: Text Processing :: General
|
28
|
+
Classifier: Typing :: Typed
|
29
|
+
Requires-Python: >=3.10
|
30
|
+
Requires-Dist: anyio>=4.9.0
|
31
|
+
Requires-Dist: chardetng-py>=0.3.4
|
32
|
+
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
33
|
+
Requires-Dist: html-to-markdown[lxml]>=1.8.0
|
34
|
+
Requires-Dist: mcp>=1.11.0
|
35
|
+
Requires-Dist: msgspec>=0.18.0
|
36
|
+
Requires-Dist: playa-pdf>=0.6.1
|
37
|
+
Requires-Dist: psutil>=7.0.0
|
38
|
+
Requires-Dist: pypdfium2==4.30.0
|
39
|
+
Requires-Dist: python-calamine>=0.3.2
|
40
|
+
Requires-Dist: python-pptx>=1.0.2
|
41
|
+
Requires-Dist: typing-extensions>=4.14.0; python_version < '3.12'
|
42
|
+
Provides-Extra: additional-extensions
|
43
|
+
Requires-Dist: mailparse>=1.0.15; extra == 'additional-extensions'
|
44
|
+
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'additional-extensions'
|
45
|
+
Provides-Extra: all
|
46
|
+
Requires-Dist: click>=8.2.1; extra == 'all'
|
47
|
+
Requires-Dist: easyocr>=1.7.2; extra == 'all'
|
48
|
+
Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
|
49
|
+
Requires-Dist: gmft>=0.4.2; extra == 'all'
|
50
|
+
Requires-Dist: keybert>=0.9.0; extra == 'all'
|
51
|
+
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'all'
|
52
|
+
Requires-Dist: mailparse>=1.0.15; extra == 'all'
|
53
|
+
Requires-Dist: paddleocr>=3.1.0; extra == 'all'
|
54
|
+
Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
|
55
|
+
Requires-Dist: rich>=14.0.0; extra == 'all'
|
56
|
+
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
|
57
|
+
Requires-Dist: setuptools>=80.9.0; extra == 'all'
|
58
|
+
Requires-Dist: spacy>=3.8.7; extra == 'all'
|
59
|
+
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
|
60
|
+
Provides-Extra: api
|
61
|
+
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'api'
|
62
|
+
Provides-Extra: chunking
|
63
|
+
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
|
64
|
+
Provides-Extra: cli
|
65
|
+
Requires-Dist: click>=8.2.1; extra == 'cli'
|
66
|
+
Requires-Dist: rich>=14.0.0; extra == 'cli'
|
67
|
+
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
|
68
|
+
Provides-Extra: easyocr
|
69
|
+
Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
|
70
|
+
Provides-Extra: entity-extraction
|
71
|
+
Requires-Dist: keybert>=0.9.0; extra == 'entity-extraction'
|
72
|
+
Requires-Dist: spacy>=3.8.7; extra == 'entity-extraction'
|
73
|
+
Provides-Extra: gmft
|
74
|
+
Requires-Dist: gmft>=0.4.2; extra == 'gmft'
|
75
|
+
Provides-Extra: langdetect
|
76
|
+
Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
|
77
|
+
Provides-Extra: paddleocr
|
78
|
+
Requires-Dist: paddleocr>=3.1.0; extra == 'paddleocr'
|
79
|
+
Requires-Dist: paddlepaddle>=3.1.0; extra == 'paddleocr'
|
80
|
+
Requires-Dist: setuptools>=80.9.0; extra == 'paddleocr'
|
81
|
+
Description-Content-Type: text/markdown
|
82
|
+
|
83
|
+
# Kreuzberg
|
84
|
+
|
85
|
+
[](https://discord.gg/pXxagNK2zN)
|
86
|
+
[](https://badge.fury.io/py/kreuzberg)
|
87
|
+
[](https://goldziher.github.io/kreuzberg/)
|
88
|
+
[](https://opensource.org/licenses/MIT)
|
89
|
+
[](https://github.com/Goldziher/kreuzberg)
|
90
|
+
|
91
|
+
**Advanced Document Intelligence for Modern Python Applications.** Transform PDFs, images, and office documents into structured data with production-grade performance. Built by engineers who understand that speed, reliability, and developer experience matter.
|
92
|
+
|
93
|
+
📖 **[Complete Documentation](https://goldziher.github.io/kreuzberg/)**
|
94
|
+
|
95
|
+
## Why Choose Kreuzberg?
|
96
|
+
|
97
|
+
### ⚡ Proven Performance
|
98
|
+
|
99
|
+
[Benchmarked](https://goldziher.github.io/python-text-extraction-libs-benchmarks/) 6-126x faster than alternatives while using minimal resources. Process up to 14 files per second with 87MB install size and ~360MB memory usage. Optimized for production workloads and resource-constrained environments.
|
100
|
+
|
101
|
+
### 🏗️ Production Engineering
|
102
|
+
|
103
|
+
Comprehensive test coverage (95%+), robust error handling, and true async/await support. Built with modern Python practices for reliability in production environments.
|
104
|
+
|
105
|
+
### 🔧 Developer Experience
|
106
|
+
|
107
|
+
Works immediately with smart defaults, scales as you grow. Native MCP integration for AI tools, full type safety, and clear documentation.
|
108
|
+
|
109
|
+
### 🚀 Flexible Deployment
|
110
|
+
|
111
|
+
Deploy on serverless platforms, containers, or traditional servers. Supports both CPU and GPU processing (via PaddleOCR and EasyOCR). No external API dependencies. Multiple deployment modes: CLI, REST API, MCP server.
|
112
|
+
|
113
|
+
### 📄 Comprehensive Format Support
|
114
|
+
|
115
|
+
Extract from PDFs, images, Office documents, HTML, spreadsheets, and presentations. Multiple OCR engines with intelligent fallbacks, table extraction, and content preparation for RAG workflows.
|
116
|
+
|
117
|
+
## Quick Start
|
118
|
+
|
119
|
+
### Installation
|
120
|
+
|
121
|
+
```bash
|
122
|
+
# Basic installation
|
123
|
+
pip install kreuzberg
|
124
|
+
|
125
|
+
# With optional features
|
126
|
+
pip install "kreuzberg[cli,api]" # CLI + REST API
|
127
|
+
pip install "kreuzberg[easyocr,gmft]" # EasyOCR + table extraction
|
128
|
+
pip install "kreuzberg[all]" # Everything
|
129
|
+
```
|
130
|
+
|
131
|
+
### System Dependencies
|
132
|
+
|
133
|
+
```bash
|
134
|
+
# Ubuntu/Debian
|
135
|
+
sudo apt-get install tesseract-ocr pandoc
|
136
|
+
|
137
|
+
# macOS
|
138
|
+
brew install tesseract pandoc
|
139
|
+
|
140
|
+
# Windows
|
141
|
+
choco install tesseract pandoc
|
142
|
+
```
|
143
|
+
|
144
|
+
### Basic Usage
|
145
|
+
|
146
|
+
```python
|
147
|
+
import asyncio
|
148
|
+
from kreuzberg import extract_file
|
149
|
+
|
150
|
+
async def main():
|
151
|
+
# Extract content from files
|
152
|
+
result = await extract_file("document.pdf")
|
153
|
+
print(result.content)
|
154
|
+
print(result.metadata)
|
155
|
+
|
156
|
+
asyncio.run(main())
|
157
|
+
```
|
158
|
+
|
159
|
+
## Deployment Options
|
160
|
+
|
161
|
+
### 🤖 MCP Server (AI Integration)
|
162
|
+
|
163
|
+
**Connect directly to Claude Desktop, Cursor, and other AI tools with the Model Context Protocol:**
|
164
|
+
|
165
|
+
```bash
|
166
|
+
# Install and run MCP server with all features (recommended)
|
167
|
+
pip install "kreuzberg[all]"
|
168
|
+
kreuzberg-mcp
|
169
|
+
|
170
|
+
# Or with uvx (recommended for Claude Desktop)
|
171
|
+
uvx --with "kreuzberg[all]" kreuzberg-mcp
|
172
|
+
|
173
|
+
# Basic installation (core features only)
|
174
|
+
pip install kreuzberg
|
175
|
+
kreuzberg-mcp
|
176
|
+
```
|
177
|
+
|
178
|
+
**Configure in Claude Desktop (`claude_desktop_config.json`):**
|
179
|
+
|
180
|
+
```json
|
181
|
+
{
|
182
|
+
"mcpServers": {
|
183
|
+
"kreuzberg": {
|
184
|
+
"command": "uvx",
|
185
|
+
"args": ["--with", "kreuzberg[all]", "kreuzberg-mcp"]
|
186
|
+
}
|
187
|
+
}
|
188
|
+
}
|
189
|
+
```
|
190
|
+
|
191
|
+
**Basic configuration (core features only):**
|
192
|
+
|
193
|
+
```json
|
194
|
+
{
|
195
|
+
"mcpServers": {
|
196
|
+
"kreuzberg": {
|
197
|
+
"command": "uvx",
|
198
|
+
"args": ["kreuzberg-mcp"]
|
199
|
+
}
|
200
|
+
}
|
201
|
+
}
|
202
|
+
```
|
203
|
+
|
204
|
+
**Available MCP capabilities:**
|
205
|
+
|
206
|
+
- **Tools**: `extract_document`, `extract_bytes`, `extract_simple`
|
207
|
+
- **Resources**: Configuration, supported formats, OCR backends
|
208
|
+
- **Prompts**: Extract-and-summarize, structured analysis workflows
|
209
|
+
|
210
|
+
### 🐳 Docker (Recommended)
|
211
|
+
|
212
|
+
```bash
|
213
|
+
# Run API server
|
214
|
+
docker run -p 8000:8000 goldziher/kreuzberg:latest
|
215
|
+
|
216
|
+
# Extract files
|
217
|
+
curl -X POST http://localhost:8000/extract -F "data=@document.pdf"
|
218
|
+
```
|
219
|
+
|
220
|
+
Available variants: `latest`, `v3.8.0`, `v3.8.0-easyocr`, `v3.8.0-paddle`, `v3.8.0-gmft`, `v3.8.0-all`
|
221
|
+
|
222
|
+
### 🌐 REST API
|
223
|
+
|
224
|
+
```bash
|
225
|
+
# Install and run
|
226
|
+
pip install "kreuzberg[api]"
|
227
|
+
litestar --app kreuzberg._api.main:app run
|
228
|
+
|
229
|
+
# Health check
|
230
|
+
curl http://localhost:8000/health
|
231
|
+
|
232
|
+
# Extract files
|
233
|
+
curl -X POST http://localhost:8000/extract -F "data=@file.pdf"
|
234
|
+
```
|
235
|
+
|
236
|
+
### 💻 Command Line
|
237
|
+
|
238
|
+
```bash
|
239
|
+
# Install CLI
|
240
|
+
pip install "kreuzberg[cli]"
|
241
|
+
|
242
|
+
# Extract to stdout
|
243
|
+
kreuzberg extract document.pdf
|
244
|
+
|
245
|
+
# JSON output with metadata
|
246
|
+
kreuzberg extract document.pdf --output-format json --show-metadata
|
247
|
+
|
248
|
+
# Batch processing
|
249
|
+
kreuzberg extract *.pdf --output-dir ./extracted/
|
250
|
+
```
|
251
|
+
|
252
|
+
## Supported Formats
|
253
|
+
|
254
|
+
| Category | Formats |
|
255
|
+
| ----------------- | ------------------------------ |
|
256
|
+
| **Documents** | PDF, DOCX, DOC, RTF, TXT, EPUB |
|
257
|
+
| **Images** | JPG, PNG, TIFF, BMP, GIF, WEBP |
|
258
|
+
| **Spreadsheets** | XLSX, XLS, CSV, ODS |
|
259
|
+
| **Presentations** | PPTX, PPT, ODP |
|
260
|
+
| **Web** | HTML, XML, MHTML |
|
261
|
+
| **Archives** | Support via extraction |
|
262
|
+
|
263
|
+
## 📊 Performance Comparison
|
264
|
+
|
265
|
+
[Comprehensive benchmarks](https://goldziher.github.io/python-text-extraction-libs-benchmarks/) across ~100 real-world documents • [View source](https://github.com/Goldziher/python-text-extraction-libs-benchmarks) • [**Detailed Analysis**](https://goldziher.github.io/kreuzberg/performance-analysis/):
|
266
|
+
|
267
|
+
| Framework | Speed | Memory | Install Size | Dependencies | Success Rate |
|
268
|
+
| ------------- | ------------ | ------ | ------------ | ------------ | ------------ |
|
269
|
+
| **Kreuzberg** | 14.4 files/s | 360MB | 87MB | 43 | 100% |
|
270
|
+
| Unstructured | ~12 files/s | ~1GB | 146MB | 54 | 88%+ |
|
271
|
+
| MarkItDown | ~15 files/s | ~1.5GB | 251MB | 25 | 80%\* |
|
272
|
+
| Docling | ~1 file/min | ~5GB | 1,032MB | 88 | 45%\* |
|
273
|
+
|
274
|
+
\*_Performance varies significantly with document complexity and size_
|
275
|
+
|
276
|
+
**Key strengths:**
|
277
|
+
|
278
|
+
- 6-126x faster processing than comparable frameworks
|
279
|
+
- Smallest installation footprint and memory usage
|
280
|
+
- Only framework with built-in async/await support
|
281
|
+
- Supports both CPU and GPU processing
|
282
|
+
- Built by software engineers for production reliability
|
283
|
+
|
284
|
+
> **Benchmark details**: Tests include PDFs, Word docs, HTML, images, and spreadsheets in multiple languages (English, Hebrew, German, Chinese, Japanese, Korean) on standardized hardware.
|
285
|
+
|
286
|
+
## Documentation
|
287
|
+
|
288
|
+
### Quick Links
|
289
|
+
|
290
|
+
- [Installation Guide](https://goldziher.github.io/kreuzberg/getting-started/installation/) - Setup and dependencies
|
291
|
+
- [User Guide](https://goldziher.github.io/kreuzberg/user-guide/) - Comprehensive usage guide
|
292
|
+
- [Performance Analysis](https://goldziher.github.io/kreuzberg/performance-analysis/) - Detailed benchmark results
|
293
|
+
- [API Reference](https://goldziher.github.io/kreuzberg/api-reference/) - Complete API documentation
|
294
|
+
- [Docker Guide](https://goldziher.github.io/kreuzberg/user-guide/docker/) - Container deployment
|
295
|
+
- [REST API](https://goldziher.github.io/kreuzberg/user-guide/api-server/) - HTTP endpoints
|
296
|
+
- [CLI Guide](https://goldziher.github.io/kreuzberg/cli/) - Command-line usage
|
297
|
+
- [OCR Configuration](https://goldziher.github.io/kreuzberg/user-guide/ocr-configuration/) - OCR engine setup
|
298
|
+
|
299
|
+
## License
|
300
|
+
|
301
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
@@ -0,0 +1,53 @@
|
|
1
|
+
kreuzberg/__init__.py,sha256=wVxbug-w1cO2xHcP04Bf6QeIKmT2Ep6aeenb8EOYLA0,1534
|
2
|
+
kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
|
3
|
+
kreuzberg/_chunker.py,sha256=2eHSRHcZdJ2ZjR3in49y3o9tPl5HMO3vkbnMqaVCbHI,1887
|
4
|
+
kreuzberg/_cli_config.py,sha256=WD_seFjbuay_NJv77vGLBW6BVV9WZNujdzf3zQkhzPc,5691
|
5
|
+
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
+
kreuzberg/_entity_extraction.py,sha256=nqpQPmR2Rf1vOwoQsjm22nPLDIcsXdYfMwCL3h8iUTQ,7802
|
7
|
+
kreuzberg/_gmft.py,sha256=Heovj2n2kgi7eHtvvRzpBgSLGyXjz8M9PAQMX-npd40,25295
|
8
|
+
kreuzberg/_language_detection.py,sha256=22-uXoOu_ws0K8Hz2M7U_SF9QX3npRYLhntAE1dNLFU,3283
|
9
|
+
kreuzberg/_mime_types.py,sha256=OhJ6gEyyLHjyvRtkk37zyLFBsRcSd_QybBaV8TxinIg,8471
|
10
|
+
kreuzberg/_playa.py,sha256=rU6ii2Qnrj8tkDYlSiab5h-BCYLJnUg4QwSLVDEXF5g,11883
|
11
|
+
kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
|
12
|
+
kreuzberg/_types.py,sha256=R_0Xc2kq4nEwkruvkB3qfrLeJ996419hBQ_1C6Xrqjo,13388
|
13
|
+
kreuzberg/cli.py,sha256=H9xxh4-zhGLfbhya2iD-NcEs-BvajVttm6cSiNx3ANU,12452
|
14
|
+
kreuzberg/exceptions.py,sha256=xRaiJh11i8E6Nc-gAQPgNW5xvhiiFBhRS-CBbCEbHQM,2881
|
15
|
+
kreuzberg/extraction.py,sha256=hY5d4oelwocX6eOBF0Bu3nHCcCbTL5JOIbaPCCFNKsU,16972
|
16
|
+
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
+
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
kreuzberg/_api/main.py,sha256=kZCMPPzP4BGzEege9pdhQTJPKKVjCaC6kZdMMeaqP2M,2599
|
19
|
+
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
+
kreuzberg/_extractors/_base.py,sha256=yNVQSECFad-8_MjqpQZ4q0jQoNdzP6-tqw6l3TfgsMc,4418
|
21
|
+
kreuzberg/_extractors/_email.py,sha256=6-Mk1TRXPyy9ylWKCpgdrogyzhiFnJOTuTRld1ghO8I,5695
|
22
|
+
kreuzberg/_extractors/_html.py,sha256=lOM1Tgrrvd7vpEeFAxC1dp0Tibr6N2FEHCjgFx0FK64,1745
|
23
|
+
kreuzberg/_extractors/_image.py,sha256=eZ7mR4F-mTwYwUzd70xrY7SZYZrNiDxnP5bYDY5P75U,4455
|
24
|
+
kreuzberg/_extractors/_pandoc.py,sha256=51k7XISfKaPorhapG7aIeQb94KGsfozxKyT2rwhk9Bk,26553
|
25
|
+
kreuzberg/_extractors/_pdf.py,sha256=Deb1ZIcqDY18CHa7cJL4vO4S7gy09yXWNSuH7O7kSzY,16430
|
26
|
+
kreuzberg/_extractors/_presentation.py,sha256=CUlqZl_QCdJdumsZh0BpROkFbvi9uq7yMoIt3bRTUeE,10859
|
27
|
+
kreuzberg/_extractors/_spread_sheet.py,sha256=Nvyz7XT7C2ai4QeUashBeENQpuP5rs8SmKfumxEqlCg,13712
|
28
|
+
kreuzberg/_extractors/_structured.py,sha256=i3jAvhHZt_BsRGgZZfgcsUqlwAg_RNc8vsuecb04T0c,5581
|
29
|
+
kreuzberg/_mcp/__init__.py,sha256=8PYV-omC8Rln7Cove8C3rHu3d7sR1FuiwSBG1O7vkAE,92
|
30
|
+
kreuzberg/_mcp/server.py,sha256=BQHeKI89aKf24BIE4n6m8r1rVA1Zgt6vM8Ki_OHuGnc,6780
|
31
|
+
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
32
|
+
kreuzberg/_ocr/_base.py,sha256=CUzYMsJjCqCmHzWckmDeIB2L5hd261xrPrK8Ql-Gdm0,3876
|
33
|
+
kreuzberg/_ocr/_easyocr.py,sha256=sWyVnF7My4F1GU-IPSVtpaDJPYogw8N-NYxwuy-6loc,17098
|
34
|
+
kreuzberg/_ocr/_paddleocr.py,sha256=nXfQq6t2a7O-IpbCZRv8BvzP_lEBLgyYwXI5-wjzec0,17480
|
35
|
+
kreuzberg/_ocr/_tesseract.py,sha256=RjJ_C8c74LmLN53sdDo8WPCpUYeJ6fmRwsQdp6dJYio,31490
|
36
|
+
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
+
kreuzberg/_utils/_cache.py,sha256=6T2K9BXWaPkEKphSFrfXtFFE7ck5q9CYV9NmAFS56e4,15204
|
38
|
+
kreuzberg/_utils/_device.py,sha256=rnaSSB5ibf2wr7EDxrcmOUZ4Ocor0pHkwb3N1pC46EY,10276
|
39
|
+
kreuzberg/_utils/_document_cache.py,sha256=z8irioKsOu8xve1YgHatm__wIFvs9I1gDK3tLNsNyqM,6926
|
40
|
+
kreuzberg/_utils/_errors.py,sha256=4OseKJI5qscD9jHxpP8CtpPWNHAOdhrJwcg6dlQl2fk,6310
|
41
|
+
kreuzberg/_utils/_pdf_lock.py,sha256=nqxAYCNlfWDrJtP4ZNu57st1YnkDl-gYXdr0q8nv0kA,1961
|
42
|
+
kreuzberg/_utils/_process_pool.py,sha256=4BqhmRspwMyPT2EBfTu_rrn7v722wlMLD8qlYvYsc00,8621
|
43
|
+
kreuzberg/_utils/_quality.py,sha256=dgFLt40NSqB8Ciej5QcZQLiV4U7LcrGux0vXckiE31U,7568
|
44
|
+
kreuzberg/_utils/_serialization.py,sha256=Rt5zSkvzf1SVNDrI6F2Zvnkel24mQkD1QvP0WjgZUgk,2195
|
45
|
+
kreuzberg/_utils/_string.py,sha256=5YKu9EZlZQ-LkphXUq8fdwKQrX9jWACFEhMGfjIysf4,6381
|
46
|
+
kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
|
47
|
+
kreuzberg/_utils/_table.py,sha256=C2skLtcyczxDEH33Qw2dOwnR15SGillvNEP-NzBG3R8,8156
|
48
|
+
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
49
|
+
kreuzberg-3.8.1.dist-info/METADATA,sha256=IqJ6RTcFlwkMN6JZIkb9c8O4rgTrPqIuzXWerD6He1I,11507
|
50
|
+
kreuzberg-3.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
51
|
+
kreuzberg-3.8.1.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
52
|
+
kreuzberg-3.8.1.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
53
|
+
kreuzberg-3.8.1.dist-info/RECORD,,
|