kreuzberg 3.8.0__py3-none-any.whl → 3.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/__init__.py +4 -0
- kreuzberg/_api/main.py +22 -1
- kreuzberg/_config.py +404 -0
- kreuzberg/_entity_extraction.py +4 -5
- kreuzberg/_extractors/_base.py +3 -5
- kreuzberg/_extractors/_image.py +18 -32
- kreuzberg/_extractors/_pandoc.py +3 -14
- kreuzberg/_extractors/_pdf.py +39 -57
- kreuzberg/_extractors/_spread_sheet.py +2 -3
- kreuzberg/_extractors/_structured.py +10 -7
- kreuzberg/_gmft.py +314 -10
- kreuzberg/_language_detection.py +1 -1
- kreuzberg/_mcp/server.py +58 -8
- kreuzberg/_ocr/__init__.py +1 -22
- kreuzberg/_ocr/_base.py +59 -0
- kreuzberg/_ocr/_easyocr.py +92 -1
- kreuzberg/_ocr/_paddleocr.py +90 -1
- kreuzberg/_ocr/_tesseract.py +556 -5
- kreuzberg/_playa.py +2 -3
- kreuzberg/_types.py +46 -24
- kreuzberg/_utils/_cache.py +35 -4
- kreuzberg/_utils/_device.py +10 -20
- kreuzberg/_utils/_errors.py +44 -45
- kreuzberg/_utils/_process_pool.py +2 -6
- kreuzberg/_utils/_quality.py +7 -11
- kreuzberg/_utils/_serialization.py +21 -16
- kreuzberg/_utils/_string.py +22 -12
- kreuzberg/_utils/_table.py +3 -4
- kreuzberg/cli.py +4 -5
- kreuzberg/exceptions.py +10 -0
- kreuzberg/extraction.py +6 -24
- kreuzberg-3.8.2.dist-info/METADATA +265 -0
- kreuzberg-3.8.2.dist-info/RECORD +53 -0
- kreuzberg/_cli_config.py +0 -175
- kreuzberg/_multiprocessing/__init__.py +0 -5
- kreuzberg/_multiprocessing/gmft_isolated.py +0 -330
- kreuzberg/_ocr/_pool.py +0 -357
- kreuzberg/_ocr/_sync.py +0 -566
- kreuzberg-3.8.0.dist-info/METADATA +0 -313
- kreuzberg-3.8.0.dist-info/RECORD +0 -57
- {kreuzberg-3.8.0.dist-info → kreuzberg-3.8.2.dist-info}/WHEEL +0 -0
- {kreuzberg-3.8.0.dist-info → kreuzberg-3.8.2.dist-info}/entry_points.txt +0 -0
- {kreuzberg-3.8.0.dist-info → kreuzberg-3.8.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,313 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: kreuzberg
|
3
|
-
Version: 3.8.0
|
4
|
-
Summary: A text extraction library supporting PDFs, images, office documents and more
|
5
|
-
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
6
|
-
Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
|
7
|
-
License: MIT
|
8
|
-
License-File: LICENSE
|
9
|
-
Keywords: document-processing,entity-extraction,image-to-text,keyword-extraction,named-entity-recognition,ner,ocr,pandoc,pdf-extraction,rag,spacy,table-extraction,tesseract,text-extraction,text-processing
|
10
|
-
Classifier: Development Status :: 5 - Production/Stable
|
11
|
-
Classifier: Intended Audience :: Developers
|
12
|
-
Classifier: License :: OSI Approved :: MIT License
|
13
|
-
Classifier: Operating System :: OS Independent
|
14
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
16
|
-
Classifier: Programming Language :: Python :: 3.11
|
17
|
-
Classifier: Programming Language :: Python :: 3.12
|
18
|
-
Classifier: Programming Language :: Python :: 3.13
|
19
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
21
|
-
Classifier: Topic :: Text Processing :: General
|
22
|
-
Classifier: Topic :: Utilities
|
23
|
-
Classifier: Typing :: Typed
|
24
|
-
Requires-Python: >=3.10
|
25
|
-
Requires-Dist: anyio>=4.9.0
|
26
|
-
Requires-Dist: chardetng-py>=0.3.4
|
27
|
-
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
28
|
-
Requires-Dist: html-to-markdown[lxml]>=1.8.0
|
29
|
-
Requires-Dist: mcp>=1.11.0
|
30
|
-
Requires-Dist: msgspec>=0.18.0
|
31
|
-
Requires-Dist: playa-pdf>=0.6.1
|
32
|
-
Requires-Dist: psutil>=7.0.0
|
33
|
-
Requires-Dist: pypdfium2==4.30.0
|
34
|
-
Requires-Dist: python-calamine>=0.3.2
|
35
|
-
Requires-Dist: python-pptx>=1.0.2
|
36
|
-
Requires-Dist: typing-extensions>=4.14.0; python_version < '3.12'
|
37
|
-
Provides-Extra: additional-extensions
|
38
|
-
Requires-Dist: mailparse>=1.0.15; extra == 'additional-extensions'
|
39
|
-
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'additional-extensions'
|
40
|
-
Provides-Extra: all
|
41
|
-
Requires-Dist: click>=8.2.1; extra == 'all'
|
42
|
-
Requires-Dist: easyocr>=1.7.2; extra == 'all'
|
43
|
-
Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
|
44
|
-
Requires-Dist: gmft>=0.4.2; extra == 'all'
|
45
|
-
Requires-Dist: keybert>=0.9.0; extra == 'all'
|
46
|
-
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'all'
|
47
|
-
Requires-Dist: mailparse>=1.0.15; extra == 'all'
|
48
|
-
Requires-Dist: paddleocr>=3.1.0; extra == 'all'
|
49
|
-
Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
|
50
|
-
Requires-Dist: rich>=14.0.0; extra == 'all'
|
51
|
-
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
|
52
|
-
Requires-Dist: setuptools>=80.9.0; extra == 'all'
|
53
|
-
Requires-Dist: spacy>=3.8.7; extra == 'all'
|
54
|
-
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
|
55
|
-
Provides-Extra: api
|
56
|
-
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'api'
|
57
|
-
Provides-Extra: chunking
|
58
|
-
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
|
59
|
-
Provides-Extra: cli
|
60
|
-
Requires-Dist: click>=8.2.1; extra == 'cli'
|
61
|
-
Requires-Dist: rich>=14.0.0; extra == 'cli'
|
62
|
-
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
|
63
|
-
Provides-Extra: easyocr
|
64
|
-
Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
|
65
|
-
Provides-Extra: entity-extraction
|
66
|
-
Requires-Dist: keybert>=0.9.0; extra == 'entity-extraction'
|
67
|
-
Requires-Dist: spacy>=3.8.7; extra == 'entity-extraction'
|
68
|
-
Provides-Extra: gmft
|
69
|
-
Requires-Dist: gmft>=0.4.2; extra == 'gmft'
|
70
|
-
Provides-Extra: langdetect
|
71
|
-
Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
|
72
|
-
Provides-Extra: paddleocr
|
73
|
-
Requires-Dist: paddleocr>=3.1.0; extra == 'paddleocr'
|
74
|
-
Requires-Dist: paddlepaddle>=3.1.0; extra == 'paddleocr'
|
75
|
-
Requires-Dist: setuptools>=80.9.0; extra == 'paddleocr'
|
76
|
-
Description-Content-Type: text/markdown
|
77
|
-
|
78
|
-
# Kreuzberg
|
79
|
-
|
80
|
-
[](https://discord.gg/pXxagNK2zN)
|
81
|
-
[](https://badge.fury.io/py/kreuzberg)
|
82
|
-
[](https://goldziher.github.io/kreuzberg/)
|
83
|
-
[](https://opensource.org/licenses/MIT)
|
84
|
-
[](https://github.com/Goldziher/kreuzberg)
|
85
|
-
|
86
|
-
**High-performance Open Source Document Intelligence framework for Python.** Built by engineers for production workloads - extract text from any document with excellent performance and minimal complexity.
|
87
|
-
|
88
|
-
📖 **[Complete Documentation](https://goldziher.github.io/kreuzberg/)**
|
89
|
-
|
90
|
-
## Why Choose Kreuzberg?
|
91
|
-
|
92
|
-
### 🚀 Performance
|
93
|
-
|
94
|
-
- [benchmarked as the fastest framework](https://goldziher.github.io/python-text-extraction-libs-benchmarks/) - 2-3x faster than the nearest alternatives
|
95
|
-
- Minimal footprint: 71MB install vs 1GB+ for competitors
|
96
|
-
- Lowest memory usage (~530MB average) optimized for production workloads
|
97
|
-
- Edge and serverless ready - deploy anywhere without heavy dependencies
|
98
|
-
|
99
|
-
### 🛠️ Engineering Quality
|
100
|
-
|
101
|
-
- Built by software engineers with modern Python best practices
|
102
|
-
- 95%+ test coverage with comprehensive test suite
|
103
|
-
- Thoroughly benchmarked and profiled for real-world performance
|
104
|
-
- Only framework offering true async/await support alongside sync APIs
|
105
|
-
- Robust error handling and detailed logging
|
106
|
-
|
107
|
-
### 🎯 Developer Experience
|
108
|
-
|
109
|
-
- Works out of the box with sane defaults, scales with your needs
|
110
|
-
- Native MCP server for AI tool integration (Claude Desktop, Cursor)
|
111
|
-
- Full type safety with excellent IDE support (completions)
|
112
|
-
- Comprehensive documentation including full API reference
|
113
|
-
|
114
|
-
### 🌍 Deployment Options
|
115
|
-
|
116
|
-
- Docker images for all architectures (AMD64, ARM64)
|
117
|
-
- Cloud native - AWS Lambda, Google Cloud Functions, Azure Functions
|
118
|
-
- CPU-only processing - no GPU requirements, lower energy consumption
|
119
|
-
- 100% local processing - no external API dependencies
|
120
|
-
- Multiple deployment modes: CLI, REST API, MCP server
|
121
|
-
|
122
|
-
### 🎯 Complete Solution
|
123
|
-
|
124
|
-
- Universal format support: PDFs, images, Office docs, HTML, spreadsheets, presentations
|
125
|
-
- Multiple OCR engines: Tesseract, EasyOCR, PaddleOCR with intelligent fallbacks
|
126
|
-
- Advanced features: Table extraction, metadata extraction, content chunking for RAG
|
127
|
-
- Production tools: REST API, CLI tools, batch processing, custom extractors
|
128
|
-
- Fully extensible: Add your own extractors
|
129
|
-
|
130
|
-
## Quick Start
|
131
|
-
|
132
|
-
### Installation
|
133
|
-
|
134
|
-
```bash
|
135
|
-
# Basic installation
|
136
|
-
pip install kreuzberg
|
137
|
-
|
138
|
-
# With optional features
|
139
|
-
pip install "kreuzberg[cli,api]" # CLI + REST API
|
140
|
-
pip install "kreuzberg[easyocr,gmft]" # EasyOCR + table extraction
|
141
|
-
pip install "kreuzberg[all]" # Everything
|
142
|
-
```
|
143
|
-
|
144
|
-
### System Dependencies
|
145
|
-
|
146
|
-
```bash
|
147
|
-
# Ubuntu/Debian
|
148
|
-
sudo apt-get install tesseract-ocr pandoc
|
149
|
-
|
150
|
-
# macOS
|
151
|
-
brew install tesseract pandoc
|
152
|
-
|
153
|
-
# Windows
|
154
|
-
choco install tesseract pandoc
|
155
|
-
```
|
156
|
-
|
157
|
-
### Basic Usage
|
158
|
-
|
159
|
-
```python
|
160
|
-
import asyncio
|
161
|
-
from kreuzberg import extract_file
|
162
|
-
|
163
|
-
async def main():
|
164
|
-
# Extract from any document type
|
165
|
-
result = await extract_file("document.pdf")
|
166
|
-
print(result.content)
|
167
|
-
print(result.metadata)
|
168
|
-
|
169
|
-
asyncio.run(main())
|
170
|
-
```
|
171
|
-
|
172
|
-
## Deployment Options
|
173
|
-
|
174
|
-
### 🤖 MCP Server (AI Integration)
|
175
|
-
|
176
|
-
**Connect directly to Claude Desktop, Cursor, and other AI tools with the Model Context Protocol:**
|
177
|
-
|
178
|
-
```bash
|
179
|
-
# Install and run MCP server with all features (recommended)
|
180
|
-
pip install "kreuzberg[all]"
|
181
|
-
kreuzberg-mcp
|
182
|
-
|
183
|
-
# Or with uvx (recommended for Claude Desktop)
|
184
|
-
uvx --with "kreuzberg[all]" kreuzberg-mcp
|
185
|
-
|
186
|
-
# Basic installation (core features only)
|
187
|
-
pip install kreuzberg
|
188
|
-
kreuzberg-mcp
|
189
|
-
```
|
190
|
-
|
191
|
-
**Configure in Claude Desktop (`claude_desktop_config.json`):**
|
192
|
-
|
193
|
-
```json
|
194
|
-
{
|
195
|
-
"mcpServers": {
|
196
|
-
"kreuzberg": {
|
197
|
-
"command": "uvx",
|
198
|
-
"args": ["--with", "kreuzberg[all]", "kreuzberg-mcp"]
|
199
|
-
}
|
200
|
-
}
|
201
|
-
}
|
202
|
-
```
|
203
|
-
|
204
|
-
**Basic configuration (core features only):**
|
205
|
-
|
206
|
-
```json
|
207
|
-
{
|
208
|
-
"mcpServers": {
|
209
|
-
"kreuzberg": {
|
210
|
-
"command": "uvx",
|
211
|
-
"args": ["kreuzberg-mcp"]
|
212
|
-
}
|
213
|
-
}
|
214
|
-
}
|
215
|
-
```
|
216
|
-
|
217
|
-
**Available MCP capabilities:**
|
218
|
-
|
219
|
-
- **Tools**: `extract_document`, `extract_bytes`, `extract_simple`
|
220
|
-
- **Resources**: Configuration, supported formats, OCR backends
|
221
|
-
- **Prompts**: Extract-and-summarize, structured analysis workflows
|
222
|
-
|
223
|
-
### 🐳 Docker (Recommended)
|
224
|
-
|
225
|
-
```bash
|
226
|
-
# Run API server
|
227
|
-
docker run -p 8000:8000 goldziher/kreuzberg:latest
|
228
|
-
|
229
|
-
# Extract files
|
230
|
-
curl -X POST http://localhost:8000/extract -F "data=@document.pdf"
|
231
|
-
```
|
232
|
-
|
233
|
-
Available variants: `latest`, `v3.8.0`, `v3.8.0-easyocr`, `v3.8.0-paddle`, `v3.8.0-gmft`, `v3.8.0-all`
|
234
|
-
|
235
|
-
### 🌐 REST API
|
236
|
-
|
237
|
-
```bash
|
238
|
-
# Install and run
|
239
|
-
pip install "kreuzberg[api]"
|
240
|
-
litestar --app kreuzberg._api.main:app run
|
241
|
-
|
242
|
-
# Health check
|
243
|
-
curl http://localhost:8000/health
|
244
|
-
|
245
|
-
# Extract files
|
246
|
-
curl -X POST http://localhost:8000/extract -F "data=@file.pdf"
|
247
|
-
```
|
248
|
-
|
249
|
-
### 💻 Command Line
|
250
|
-
|
251
|
-
```bash
|
252
|
-
# Install CLI
|
253
|
-
pip install "kreuzberg[cli]"
|
254
|
-
|
255
|
-
# Extract to stdout
|
256
|
-
kreuzberg extract document.pdf
|
257
|
-
|
258
|
-
# JSON output with metadata
|
259
|
-
kreuzberg extract document.pdf --output-format json --show-metadata
|
260
|
-
|
261
|
-
# Batch processing
|
262
|
-
kreuzberg extract *.pdf --output-dir ./extracted/
|
263
|
-
```
|
264
|
-
|
265
|
-
## Supported Formats
|
266
|
-
|
267
|
-
| Category | Formats |
|
268
|
-
| ----------------- | ------------------------------ |
|
269
|
-
| **Documents** | PDF, DOCX, DOC, RTF, TXT, EPUB |
|
270
|
-
| **Images** | JPG, PNG, TIFF, BMP, GIF, WEBP |
|
271
|
-
| **Spreadsheets** | XLSX, XLS, CSV, ODS |
|
272
|
-
| **Presentations** | PPTX, PPT, ODP |
|
273
|
-
| **Web** | HTML, XML, MHTML |
|
274
|
-
| **Archives** | Support via extraction |
|
275
|
-
|
276
|
-
## 📊 Performance Comparison
|
277
|
-
|
278
|
-
[Comprehensive benchmarks](https://goldziher.github.io/python-text-extraction-libs-benchmarks/) across 94 real-world documents • [View source](https://github.com/Goldziher/python-text-extraction-libs-benchmarks):
|
279
|
-
|
280
|
-
| Framework | Speed | Memory | Install Size | Dependencies | Success Rate |
|
281
|
-
| ------------- | ----------- | ------ | ------------ | ------------ | ------------ |
|
282
|
-
| **Kreuzberg** | 35+ files/s | 530MB | 71MB | 20 | High |
|
283
|
-
| Unstructured | ~12 files/s | ~1GB | 146MB | 54 | 88%+ |
|
284
|
-
| MarkItDown | ~15 files/s | ~1.5GB | 251MB | 25 | 80%\* |
|
285
|
-
| Docling | ~1 file/min | ~5GB | 1,032MB | 88 | 45%\* |
|
286
|
-
|
287
|
-
\*_Performance varies significantly with document complexity and size_
|
288
|
-
|
289
|
-
**Key strengths:**
|
290
|
-
|
291
|
-
- 2-3x faster processing than comparable frameworks
|
292
|
-
- Smallest installation footprint and memory usage
|
293
|
-
- Only framework with built-in async/await support
|
294
|
-
- CPU-only processing - no GPU dependencies
|
295
|
-
- Built by software engineers for production reliability
|
296
|
-
|
297
|
-
> **Benchmark details**: Tests include PDFs, Word docs, HTML, images, and spreadsheets in multiple languages (English, Hebrew, German, Chinese, Japanese, Korean) on standardized hardware.
|
298
|
-
|
299
|
-
## Documentation
|
300
|
-
|
301
|
-
### Quick Links
|
302
|
-
|
303
|
-
- [Installation Guide](https://goldziher.github.io/kreuzberg/getting-started/installation/) - Setup and dependencies
|
304
|
-
- [User Guide](https://goldziher.github.io/kreuzberg/user-guide/) - Comprehensive usage guide
|
305
|
-
- [API Reference](https://goldziher.github.io/kreuzberg/api-reference/) - Complete API documentation
|
306
|
-
- [Docker Guide](https://goldziher.github.io/kreuzberg/user-guide/docker/) - Container deployment
|
307
|
-
- [REST API](https://goldziher.github.io/kreuzberg/user-guide/api-server/) - HTTP endpoints
|
308
|
-
- [CLI Guide](https://goldziher.github.io/kreuzberg/cli/) - Command-line usage
|
309
|
-
- [OCR Configuration](https://goldziher.github.io/kreuzberg/user-guide/ocr-configuration/) - OCR engine setup
|
310
|
-
|
311
|
-
## License
|
312
|
-
|
313
|
-
MIT License - see [LICENSE](LICENSE) for details.
|
kreuzberg-3.8.0.dist-info/RECORD
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
kreuzberg/__init__.py,sha256=wVxbug-w1cO2xHcP04Bf6QeIKmT2Ep6aeenb8EOYLA0,1534
|
2
|
-
kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
|
3
|
-
kreuzberg/_chunker.py,sha256=2eHSRHcZdJ2ZjR3in49y3o9tPl5HMO3vkbnMqaVCbHI,1887
|
4
|
-
kreuzberg/_cli_config.py,sha256=WD_seFjbuay_NJv77vGLBW6BVV9WZNujdzf3zQkhzPc,5691
|
5
|
-
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
-
kreuzberg/_entity_extraction.py,sha256=EIasBGpkZ-3FwivjEpisz23LilTwx8os-IbfrDtzNl4,7815
|
7
|
-
kreuzberg/_gmft.py,sha256=ZIEUu4Uy5zYNFEeDRbz1cLJhnCAStVsSzm1PQ3vDeO8,14828
|
8
|
-
kreuzberg/_language_detection.py,sha256=22-uXoOu_ws0K8Hz2M7U_SF9QX3npRYLhntAE1dNLFU,3283
|
9
|
-
kreuzberg/_mime_types.py,sha256=OhJ6gEyyLHjyvRtkk37zyLFBsRcSd_QybBaV8TxinIg,8471
|
10
|
-
kreuzberg/_playa.py,sha256=rU6ii2Qnrj8tkDYlSiab5h-BCYLJnUg4QwSLVDEXF5g,11883
|
11
|
-
kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
|
12
|
-
kreuzberg/_types.py,sha256=R_0Xc2kq4nEwkruvkB3qfrLeJ996419hBQ_1C6Xrqjo,13388
|
13
|
-
kreuzberg/cli.py,sha256=S0w2nGXBWPFn1NhxppW7dpUwB9f_3ymFuWSAB2aRu9g,12465
|
14
|
-
kreuzberg/exceptions.py,sha256=xRaiJh11i8E6Nc-gAQPgNW5xvhiiFBhRS-CBbCEbHQM,2881
|
15
|
-
kreuzberg/extraction.py,sha256=mdH45bMAAUUNXYT7UrNyWJ2oD_gXuLUU-NyuYxQM884,17459
|
16
|
-
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
kreuzberg/_api/main.py,sha256=kZCMPPzP4BGzEege9pdhQTJPKKVjCaC6kZdMMeaqP2M,2599
|
19
|
-
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
-
kreuzberg/_extractors/_base.py,sha256=ECEwBpxnIy_J9kGZGuqsaPCgLFfxRn7kn4hIf11gDJ8,4478
|
21
|
-
kreuzberg/_extractors/_email.py,sha256=6-Mk1TRXPyy9ylWKCpgdrogyzhiFnJOTuTRld1ghO8I,5695
|
22
|
-
kreuzberg/_extractors/_html.py,sha256=lOM1Tgrrvd7vpEeFAxC1dp0Tibr6N2FEHCjgFx0FK64,1745
|
23
|
-
kreuzberg/_extractors/_image.py,sha256=0kzOQTTeJacaA8I9833fFvVQSz6FtUe9Nuw1oy0ToD0,4939
|
24
|
-
kreuzberg/_extractors/_pandoc.py,sha256=oQ4DgQSPoX1LXjGAKh_A40JHqiKWb91LeRBYSS_6EUA,26750
|
25
|
-
kreuzberg/_extractors/_pdf.py,sha256=giYG3aEdmsxT0tGWKBaMzHDPz74-jVmK4HZARDEBhsM,17108
|
26
|
-
kreuzberg/_extractors/_presentation.py,sha256=CUlqZl_QCdJdumsZh0BpROkFbvi9uq7yMoIt3bRTUeE,10859
|
27
|
-
kreuzberg/_extractors/_spread_sheet.py,sha256=Nvyz7XT7C2ai4QeUashBeENQpuP5rs8SmKfumxEqlCg,13712
|
28
|
-
kreuzberg/_extractors/_structured.py,sha256=i3jAvhHZt_BsRGgZZfgcsUqlwAg_RNc8vsuecb04T0c,5581
|
29
|
-
kreuzberg/_mcp/__init__.py,sha256=8PYV-omC8Rln7Cove8C3rHu3d7sR1FuiwSBG1O7vkAE,92
|
30
|
-
kreuzberg/_mcp/server.py,sha256=BQHeKI89aKf24BIE4n6m8r1rVA1Zgt6vM8Ki_OHuGnc,6780
|
31
|
-
kreuzberg/_multiprocessing/__init__.py,sha256=X2BtgKmWhF1rl0JYg2gvoSUaozKExfsWh-RRNvzNoOs,202
|
32
|
-
kreuzberg/_multiprocessing/gmft_isolated.py,sha256=ZfbhiL5bhBEJnibUSls3WV-FECrnU9VvKfq5O2foHcc,11191
|
33
|
-
kreuzberg/_ocr/__init__.py,sha256=CC9Ob1t_ltTYUamK1ZtmkswfCYdn1B-Z0kPemsQU0xU,1439
|
34
|
-
kreuzberg/_ocr/_base.py,sha256=lNT0Tin4hzbmaamqqySxvYEwNtrJB5gGlStrANQQcyc,1637
|
35
|
-
kreuzberg/_ocr/_easyocr.py,sha256=90Dv1xaLXbpG7EtmRQE5ykvnhqZJR3xSFXlxFMCSVSI,13740
|
36
|
-
kreuzberg/_ocr/_paddleocr.py,sha256=UvugDdZd7RojHUiFeBaI8aqz36ecegPLj2v6oT6c42g,13776
|
37
|
-
kreuzberg/_ocr/_pool.py,sha256=Yb0l_GxnPsIWn3NA2FuBYEC8ipIqgwaYglUt0ltqSvk,10948
|
38
|
-
kreuzberg/_ocr/_sync.py,sha256=cdLiH9hYqygzqW3LkibhrE6C8atin7mfTv_k3JJFE0k,18287
|
39
|
-
kreuzberg/_ocr/_tesseract.py,sha256=KtenEIGL63gRhdH2hxOEVM89locAETGo2bNjQMXjTwY,13266
|
40
|
-
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
41
|
-
kreuzberg/_utils/_cache.py,sha256=CtpSmEggWoIPDZ9_Nl0i5pr7wtPyci8EVT-ajYsARGI,13609
|
42
|
-
kreuzberg/_utils/_device.py,sha256=rnaSSB5ibf2wr7EDxrcmOUZ4Ocor0pHkwb3N1pC46EY,10276
|
43
|
-
kreuzberg/_utils/_document_cache.py,sha256=z8irioKsOu8xve1YgHatm__wIFvs9I1gDK3tLNsNyqM,6926
|
44
|
-
kreuzberg/_utils/_errors.py,sha256=AV3oaRQDgJxe1YUZd9pCQUysUv9KW8Ib37MvnyFOZ4o,6386
|
45
|
-
kreuzberg/_utils/_pdf_lock.py,sha256=nqxAYCNlfWDrJtP4ZNu57st1YnkDl-gYXdr0q8nv0kA,1961
|
46
|
-
kreuzberg/_utils/_process_pool.py,sha256=E3bHOO67TeoLUBjtw5HoY9gyFl621VaImYI-_itQ96c,8653
|
47
|
-
kreuzberg/_utils/_quality.py,sha256=dgFLt40NSqB8Ciej5QcZQLiV4U7LcrGux0vXckiE31U,7568
|
48
|
-
kreuzberg/_utils/_serialization.py,sha256=Rt5zSkvzf1SVNDrI6F2Zvnkel24mQkD1QvP0WjgZUgk,2195
|
49
|
-
kreuzberg/_utils/_string.py,sha256=5YKu9EZlZQ-LkphXUq8fdwKQrX9jWACFEhMGfjIysf4,6381
|
50
|
-
kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
|
51
|
-
kreuzberg/_utils/_table.py,sha256=C2skLtcyczxDEH33Qw2dOwnR15SGillvNEP-NzBG3R8,8156
|
52
|
-
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
53
|
-
kreuzberg-3.8.0.dist-info/METADATA,sha256=d1N7v0EvJA-22g071Dctler5zF11WlKGTgLjGpsV8iw,11422
|
54
|
-
kreuzberg-3.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
55
|
-
kreuzberg-3.8.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
56
|
-
kreuzberg-3.8.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
57
|
-
kreuzberg-3.8.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|