kreuzberg 3.11.4__py3-none-any.whl → 3.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/__init__.py +14 -13
- kreuzberg/__main__.py +0 -2
- kreuzberg/_api/main.py +119 -9
- kreuzberg/_config.py +248 -204
- kreuzberg/_document_classification.py +0 -8
- kreuzberg/_entity_extraction.py +1 -93
- kreuzberg/_extractors/_base.py +0 -5
- kreuzberg/_extractors/_email.py +1 -11
- kreuzberg/_extractors/_html.py +9 -12
- kreuzberg/_extractors/_image.py +1 -23
- kreuzberg/_extractors/_pandoc.py +10 -89
- kreuzberg/_extractors/_pdf.py +39 -92
- kreuzberg/_extractors/_presentation.py +0 -17
- kreuzberg/_extractors/_spread_sheet.py +13 -53
- kreuzberg/_extractors/_structured.py +1 -4
- kreuzberg/_gmft.py +14 -138
- kreuzberg/_language_detection.py +1 -22
- kreuzberg/_mcp/__init__.py +0 -2
- kreuzberg/_mcp/server.py +3 -10
- kreuzberg/_mime_types.py +1 -2
- kreuzberg/_ocr/_easyocr.py +21 -108
- kreuzberg/_ocr/_paddleocr.py +16 -94
- kreuzberg/_ocr/_table_extractor.py +260 -0
- kreuzberg/_ocr/_tesseract.py +906 -264
- kreuzberg/_playa.py +5 -4
- kreuzberg/_types.py +638 -40
- kreuzberg/_utils/_cache.py +88 -90
- kreuzberg/_utils/_device.py +0 -18
- kreuzberg/_utils/_document_cache.py +0 -2
- kreuzberg/_utils/_errors.py +0 -3
- kreuzberg/_utils/_pdf_lock.py +0 -2
- kreuzberg/_utils/_process_pool.py +19 -19
- kreuzberg/_utils/_quality.py +0 -43
- kreuzberg/_utils/_ref.py +48 -0
- kreuzberg/_utils/_serialization.py +0 -5
- kreuzberg/_utils/_string.py +9 -39
- kreuzberg/_utils/_sync.py +0 -1
- kreuzberg/_utils/_table.py +50 -57
- kreuzberg/cli.py +54 -74
- kreuzberg/extraction.py +39 -32
- {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/METADATA +17 -14
- kreuzberg-3.13.0.dist-info/RECORD +56 -0
- kreuzberg-3.11.4.dist-info/RECORD +0 -54
- {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/WHEEL +0 -0
- {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/entry_points.txt +0 -0
- {kreuzberg-3.11.4.dist-info → kreuzberg-3.13.0.dist-info}/licenses/LICENSE +0 -0
kreuzberg/extraction.py
CHANGED
@@ -151,20 +151,22 @@ async def extract_file(
|
|
151
151
|
"""
|
152
152
|
cache = get_document_cache()
|
153
153
|
path = Path(file_path)
|
154
|
-
cached_result = cache.get(path, config)
|
155
|
-
if cached_result is not None:
|
156
|
-
return cached_result
|
157
154
|
|
158
|
-
if
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
# Try cache again after waiting for other process to complete # ~keep
|
163
|
-
cached_result = cache.get(path, config) # pragma: no cover
|
164
|
-
if cached_result is not None: # pragma: no cover
|
155
|
+
if config.use_cache:
|
156
|
+
cached_result = cache.get(path, config)
|
157
|
+
if cached_result is not None:
|
165
158
|
return cached_result
|
166
159
|
|
167
|
-
|
160
|
+
if cache.is_processing(path, config):
|
161
|
+
event = cache.mark_processing(path, config)
|
162
|
+
await anyio.to_thread.run_sync(event.wait) # pragma: no cover
|
163
|
+
|
164
|
+
# Try cache again after waiting for other process to complete # ~keep
|
165
|
+
cached_result = cache.get(path, config) # pragma: no cover
|
166
|
+
if cached_result is not None: # pragma: no cover
|
167
|
+
return cached_result
|
168
|
+
|
169
|
+
cache.mark_processing(path, config)
|
168
170
|
|
169
171
|
try:
|
170
172
|
if not path.exists():
|
@@ -183,11 +185,13 @@ async def extract_file(
|
|
183
185
|
|
184
186
|
result = await _validate_and_post_process_async(result=result, config=config, file_path=path)
|
185
187
|
|
186
|
-
|
188
|
+
if config.use_cache:
|
189
|
+
cache.set(path, config, result)
|
187
190
|
|
188
191
|
return result
|
189
192
|
finally:
|
190
|
-
|
193
|
+
if config.use_cache:
|
194
|
+
cache.mark_complete(path, config)
|
191
195
|
|
192
196
|
|
193
197
|
async def batch_extract_file(
|
@@ -224,7 +228,7 @@ async def batch_extract_file(
|
|
224
228
|
content=f"Error: {type(e).__name__}: {e!s}",
|
225
229
|
mime_type="text/plain",
|
226
230
|
metadata={ # type: ignore[typeddict-unknown-key]
|
227
|
-
"error":
|
231
|
+
"error": f"{type(e).__name__}: {e!s}",
|
228
232
|
"error_context": create_error_context(
|
229
233
|
operation="batch_extract_file",
|
230
234
|
file_path=path,
|
@@ -273,7 +277,7 @@ async def batch_extract_bytes(
|
|
273
277
|
content=f"Error: {type(e).__name__}: {e!s}",
|
274
278
|
mime_type="text/plain",
|
275
279
|
metadata={ # type: ignore[typeddict-unknown-key]
|
276
|
-
"error":
|
280
|
+
"error": f"{type(e).__name__}: {e!s}",
|
277
281
|
"error_context": create_error_context(
|
278
282
|
operation="batch_extract_bytes",
|
279
283
|
error=e,
|
@@ -336,20 +340,22 @@ def extract_file_sync(
|
|
336
340
|
"""
|
337
341
|
cache = get_document_cache()
|
338
342
|
path = Path(file_path)
|
339
|
-
cached_result = cache.get(path, config)
|
340
|
-
if cached_result is not None:
|
341
|
-
return cached_result
|
342
343
|
|
343
|
-
if
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
# Try cache again after waiting for other process to complete # ~keep
|
348
|
-
cached_result = cache.get(path, config) # pragma: no cover
|
349
|
-
if cached_result is not None: # pragma: no cover
|
344
|
+
if config.use_cache:
|
345
|
+
cached_result = cache.get(path, config)
|
346
|
+
if cached_result is not None:
|
350
347
|
return cached_result
|
351
348
|
|
352
|
-
|
349
|
+
if cache.is_processing(path, config):
|
350
|
+
event = cache.mark_processing(path, config)
|
351
|
+
event.wait() # pragma: no cover
|
352
|
+
|
353
|
+
# Try cache again after waiting for other process to complete # ~keep
|
354
|
+
cached_result = cache.get(path, config) # pragma: no cover
|
355
|
+
if cached_result is not None: # pragma: no cover
|
356
|
+
return cached_result
|
357
|
+
|
358
|
+
cache.mark_processing(path, config)
|
353
359
|
|
354
360
|
try:
|
355
361
|
if not path.exists():
|
@@ -360,7 +366,7 @@ def extract_file_sync(
|
|
360
366
|
result = extractor.extract_path_sync(Path(file_path))
|
361
367
|
else:
|
362
368
|
result = ExtractionResult(
|
363
|
-
content=Path(file_path).read_text(),
|
369
|
+
content=Path(file_path).read_text(encoding="utf-8"),
|
364
370
|
chunks=[],
|
365
371
|
mime_type=mime_type,
|
366
372
|
metadata={},
|
@@ -368,11 +374,13 @@ def extract_file_sync(
|
|
368
374
|
|
369
375
|
result = _validate_and_post_process_sync(result=result, config=config, file_path=path)
|
370
376
|
|
371
|
-
|
377
|
+
if config.use_cache:
|
378
|
+
cache.set(path, config, result)
|
372
379
|
|
373
380
|
return result
|
374
381
|
finally:
|
375
|
-
|
382
|
+
if config.use_cache:
|
383
|
+
cache.mark_complete(path, config)
|
376
384
|
|
377
385
|
|
378
386
|
def batch_extract_file_sync(
|
@@ -404,7 +412,7 @@ def batch_extract_file_sync(
|
|
404
412
|
content=f"Error: {type(e).__name__}: {e!s}",
|
405
413
|
mime_type="text/plain",
|
406
414
|
metadata={ # type: ignore[typeddict-unknown-key]
|
407
|
-
"error":
|
415
|
+
"error": f"{type(e).__name__}: {e!s}",
|
408
416
|
"error_context": create_error_context(
|
409
417
|
operation="batch_extract_file_sync",
|
410
418
|
file_path=file_path,
|
@@ -455,7 +463,7 @@ def batch_extract_bytes_sync(
|
|
455
463
|
content=f"Error: {type(e).__name__}: {e!s}",
|
456
464
|
mime_type="text/plain",
|
457
465
|
metadata={ # type: ignore[typeddict-unknown-key]
|
458
|
-
"error":
|
466
|
+
"error": f"{type(e).__name__}: {e!s}",
|
459
467
|
"error_context": create_error_context(
|
460
468
|
operation="batch_extract_bytes_sync",
|
461
469
|
error=e,
|
@@ -469,7 +477,6 @@ def batch_extract_bytes_sync(
|
|
469
477
|
return (index, error_result)
|
470
478
|
|
471
479
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
472
|
-
# Avoid creating intermediate list, use enumerate directly
|
473
480
|
future_to_index = {executor.submit(extract_single, (i, content)): i for i, content in enumerate(contents)}
|
474
481
|
|
475
482
|
results: list[ExtractionResult] = [None] * len(contents) # type: ignore[list-item]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.13.0
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -31,15 +31,16 @@ Requires-Python: >=3.10
|
|
31
31
|
Requires-Dist: anyio>=4.10.0
|
32
32
|
Requires-Dist: chardetng-py>=0.3.5
|
33
33
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
34
|
-
Requires-Dist: html-to-markdown[lxml]>=1.9.
|
34
|
+
Requires-Dist: html-to-markdown[lxml]>=1.9.1
|
35
35
|
Requires-Dist: mcp>=1.13.0
|
36
36
|
Requires-Dist: msgspec>=0.18.0
|
37
37
|
Requires-Dist: playa-pdf>=0.7.0
|
38
|
+
Requires-Dist: polars>=1.33.0
|
38
39
|
Requires-Dist: psutil>=7.0.0
|
39
40
|
Requires-Dist: pypdfium2==4.30.0
|
40
41
|
Requires-Dist: python-calamine>=0.3.2
|
41
42
|
Requires-Dist: python-pptx>=1.0.2
|
42
|
-
Requires-Dist: typing-extensions>=4.
|
43
|
+
Requires-Dist: typing-extensions>=4.15.0; python_version < '3.12'
|
43
44
|
Provides-Extra: additional-extensions
|
44
45
|
Requires-Dist: mailparse>=1.0.15; extra == 'additional-extensions'
|
45
46
|
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'additional-extensions'
|
@@ -54,7 +55,6 @@ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all
|
|
54
55
|
Requires-Dist: mailparse>=1.0.15; extra == 'all'
|
55
56
|
Requires-Dist: paddleocr>=3.2.0; extra == 'all'
|
56
57
|
Requires-Dist: paddlepaddle>=3.1.1; extra == 'all'
|
57
|
-
Requires-Dist: pandas>=2.3.2; extra == 'all'
|
58
58
|
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
|
59
59
|
Requires-Dist: rich>=14.1.0; extra == 'all'
|
60
60
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
|
@@ -73,7 +73,6 @@ Provides-Extra: crypto
|
|
73
73
|
Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'crypto'
|
74
74
|
Provides-Extra: document-classification
|
75
75
|
Requires-Dist: deep-translator>=1.11.4; extra == 'document-classification'
|
76
|
-
Requires-Dist: pandas>=2.3.2; extra == 'document-classification'
|
77
76
|
Provides-Extra: easyocr
|
78
77
|
Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
|
79
78
|
Provides-Extra: entity-extraction
|
@@ -109,8 +108,7 @@ Description-Content-Type: text/markdown
|
|
109
108
|
- **Text Extraction**: High-fidelity text extraction preserving document structure and formatting
|
110
109
|
- **Metadata Extraction**: Comprehensive metadata including author, creation date, language, and document properties
|
111
110
|
- **Format Support**: 18 document types including PDF, Microsoft Office, images, HTML, and structured data formats
|
112
|
-
- **OCR Integration**:
|
113
|
-
- **Table Detection**: Structured table extraction with cell-level precision via GMFT integration
|
111
|
+
- **OCR Integration**: Tesseract OCR with markdown output (default) and table extraction from scanned documents
|
114
112
|
- **Document Classification**: Automatic document type detection (contracts, forms, invoices, receipts, reports)
|
115
113
|
|
116
114
|
### Technical Architecture
|
@@ -138,8 +136,8 @@ Kreuzberg leverages established open source technologies:
|
|
138
136
|
# Extract text from any file to text format
|
139
137
|
uvx kreuzberg extract document.pdf > output.txt
|
140
138
|
|
141
|
-
# With all features (
|
142
|
-
uvx
|
139
|
+
# With all features (chunking, language detection, etc.)
|
140
|
+
uvx kreuzberg extract invoice.pdf --ocr-backend tesseract --output-format text
|
143
141
|
|
144
142
|
# Extract with rich metadata
|
145
143
|
uvx kreuzberg extract report.pdf --show-metadata --output-format json
|
@@ -179,10 +177,15 @@ print(f"Keywords: {result.metadata.keywords}")
|
|
179
177
|
|
180
178
|
### Docker
|
181
179
|
|
180
|
+
Two optimized images available:
|
181
|
+
|
182
182
|
```bash
|
183
|
-
#
|
183
|
+
# Base image (API + CLI + multilingual OCR)
|
184
184
|
docker run -p 8000:8000 goldziher/kreuzberg
|
185
185
|
|
186
|
+
# Core image (+ chunking + crypto + document classification + language detection)
|
187
|
+
docker run -p 8000:8000 goldziher/kreuzberg-core:latest
|
188
|
+
|
186
189
|
# Extract via API
|
187
190
|
curl -X POST -F "file=@document.pdf" http://localhost:8000/extract
|
188
191
|
```
|
@@ -196,7 +199,7 @@ curl -X POST -F "file=@document.pdf" http://localhost:8000/extract
|
|
196
199
|
**Add to Claude Desktop with one command:**
|
197
200
|
|
198
201
|
```bash
|
199
|
-
claude mcp add kreuzberg uvx
|
202
|
+
claude mcp add kreuzberg uvx kreuzberg-mcp
|
200
203
|
```
|
201
204
|
|
202
205
|
**Or configure manually in `claude_desktop_config.json`:**
|
@@ -206,7 +209,7 @@ claude mcp add kreuzberg uvx -- --from "kreuzberg[all]" kreuzberg-mcp
|
|
206
209
|
"mcpServers": {
|
207
210
|
"kreuzberg": {
|
208
211
|
"command": "uvx",
|
209
|
-
"args": ["
|
212
|
+
"args": ["kreuzberg-mcp"]
|
210
213
|
}
|
211
214
|
}
|
212
215
|
}
|
@@ -215,8 +218,8 @@ claude mcp add kreuzberg uvx -- --from "kreuzberg[all]" kreuzberg-mcp
|
|
215
218
|
**MCP capabilities:**
|
216
219
|
|
217
220
|
- Extract text from PDFs, images, Office docs, and more
|
218
|
-
-
|
219
|
-
-
|
221
|
+
- Multilingual OCR support with Tesseract
|
222
|
+
- Metadata parsing and language detection
|
220
223
|
|
221
224
|
📖 **[MCP Documentation](https://kreuzberg.dev/user-guide/mcp-server/)**
|
222
225
|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
|
2
|
+
kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
|
3
|
+
kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
|
4
|
+
kreuzberg/_config.py,sha256=dSTumnpleMeUjUabWgAH7WlhTkdNG3eeMv8FSFmUaEI,15776
|
5
|
+
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
+
kreuzberg/_document_classification.py,sha256=NZ-6tQtVa1OgigC7xf30hAsnL5_gi9ak9X2XYdsCfTI,6361
|
7
|
+
kreuzberg/_entity_extraction.py,sha256=QFIPQ_fovEnEezpS6W4pwpjTA2PqS7TUCD9AKf8sAyc,4666
|
8
|
+
kreuzberg/_gmft.py,sha256=60WpPTf7jocU-kmkBe-pBytl7l58aQzd-Aw2_Hlioug,21481
|
9
|
+
kreuzberg/_language_detection.py,sha256=yLUliJOUyofVma_q6FwzG9Ck4-XX3AEjxleTHrqi8R4,2445
|
10
|
+
kreuzberg/_mime_types.py,sha256=fwtPKtp2XhCLT686qF26PBMeOqcVJroKPwkp7JgaM0E,8462
|
11
|
+
kreuzberg/_playa.py,sha256=1viLRqgcDWvaPo5ZsDPO2gqHFSBApOYortTV_SPVK9k,12190
|
12
|
+
kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
|
13
|
+
kreuzberg/_types.py,sha256=WFUFY1S7SL7kTfHCX-zGASLYT94FxLD71C9vGUzFOiA,38922
|
14
|
+
kreuzberg/cli.py,sha256=MLeWoMcLoN6WnkbyRbOY-2dqp-vNZf7Nb-K_R5F5CoU,12730
|
15
|
+
kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
|
16
|
+
kreuzberg/extraction.py,sha256=jiMKiDyTf3sHyk76sMffHR-eH-_yg-DFRMuXEKufRYI,17649
|
17
|
+
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
kreuzberg/_api/main.py,sha256=JALYRD0qwyoZloWk5dNNuslBtG4GlVNc0G2oADm6cAc,7578
|
20
|
+
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
kreuzberg/_extractors/_base.py,sha256=EZTEJzwJxwu_yYFQ5QlZVNQMPCcli7yyUB4T5mFotCY,4209
|
22
|
+
kreuzberg/_extractors/_email.py,sha256=mVi_VDmiFhe6NgiWxJDYt4DQiP6jVs5dP8BsPClm3WQ,6108
|
23
|
+
kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
|
24
|
+
kreuzberg/_extractors/_image.py,sha256=UZEOmKNAS4KjaX38iYq2Ux6Mta3juCF1MzWNeBxpPE8,3414
|
25
|
+
kreuzberg/_extractors/_pandoc.py,sha256=zumwImIXwD3ziPhYxt0EQct5sSMy5lQiY6KnPSDxBTU,24183
|
26
|
+
kreuzberg/_extractors/_pdf.py,sha256=766O7rXAeAJ42vPpWbGpW_WgHXm48eWwX09l3aqjKeM,18064
|
27
|
+
kreuzberg/_extractors/_presentation.py,sha256=BJdEM9jsuAd0vb-PIRwNMcRj4xVjItb5kpOpnjsCBi0,10175
|
28
|
+
kreuzberg/_extractors/_spread_sheet.py,sha256=wqAV-Stqfd4hXs5ock-chqBEdzv4voSgT1uFUO1cIU0,12075
|
29
|
+
kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
|
30
|
+
kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
|
31
|
+
kreuzberg/_mcp/server.py,sha256=iYJG6g0u7I6mWtC4R1XlxydBrPpgnp5dGJzpm9QAZig,8438
|
32
|
+
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
33
|
+
kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
|
34
|
+
kreuzberg/_ocr/_easyocr.py,sha256=CtiHGx_BmuUwZhC7bScYF9mwnAxRrLWJ-X70fuwFTjk,14079
|
35
|
+
kreuzberg/_ocr/_paddleocr.py,sha256=wCuIQ_yxPWE9hukiehYNRdt00Rb2h6pWdfqPS8hI2s0,14297
|
36
|
+
kreuzberg/_ocr/_table_extractor.py,sha256=MeQLQn_bRco5OAcUoy613ZbZLCDBRJY8uHH_bUBSP8I,7613
|
37
|
+
kreuzberg/_ocr/_tesseract.py,sha256=i_UTjOmrFxZbtmXxrQIsE78wtZLTyZph0i0jDQc4EMA,56916
|
38
|
+
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
|
+
kreuzberg/_utils/_cache.py,sha256=fDqFp_54-Kyn3_4VkXkhovvNIB2osKqXlS13MlORrU8,14539
|
40
|
+
kreuzberg/_utils/_device.py,sha256=35xQvrLSPISJlWicQGknoBjkwdalwVxiJbzyxwuwOVo,9747
|
41
|
+
kreuzberg/_utils/_document_cache.py,sha256=CpCdJVd8SYLjfwm0ozSM8mx5x8i9vVDet3BlEUpzuZY,6920
|
42
|
+
kreuzberg/_utils/_errors.py,sha256=ctD-s1q7vbEgqHQ3OVJiEOODDLTd2LvrM3z6o37zrGI,6395
|
43
|
+
kreuzberg/_utils/_pdf_lock.py,sha256=mHB1A4Fo_nSfgdqUNEWODH9b5tNFqpEHcNE6rT41dGE,1886
|
44
|
+
kreuzberg/_utils/_process_pool.py,sha256=ebuMPmHXPkWaLWjgAkeaONvAZo974PhfENN8pnPTCco,8415
|
45
|
+
kreuzberg/_utils/_quality.py,sha256=m3SIXGDY9pfRmh3XeKdZWT1vBz7issH0SfKsutEuRxw,5833
|
46
|
+
kreuzberg/_utils/_ref.py,sha256=uP_S3x0AQH2Nyjo1tYEj7N_u9hGzYVewdjch6a8Fv5I,1458
|
47
|
+
kreuzberg/_utils/_serialization.py,sha256=duKP5OuBvi-m6ljQOhoyuJU7sl2WPnov8yJDpYuDArw,2052
|
48
|
+
kreuzberg/_utils/_string.py,sha256=yrcwHHl23FxWrNoFXkmR3icgivfvbLRvkqQek8F3qqI,5020
|
49
|
+
kreuzberg/_utils/_sync.py,sha256=mc-K2y_sc6mG-HOswlHTXAWaEzgisEERvq9PPw2dAw4,4869
|
50
|
+
kreuzberg/_utils/_table.py,sha256=dYM_dWNHRCXcWOhSQBnahOJaBXyuQFyYX9arHrH4TF8,7555
|
51
|
+
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
52
|
+
kreuzberg-3.13.0.dist-info/METADATA,sha256=896BWDLD6ApGiOQFKXMqQezC4qgKRUxjMqbZVWxBoJ0,12098
|
53
|
+
kreuzberg-3.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
54
|
+
kreuzberg-3.13.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
55
|
+
kreuzberg-3.13.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
56
|
+
kreuzberg-3.13.0.dist-info/RECORD,,
|
@@ -1,54 +0,0 @@
|
|
1
|
-
kreuzberg/__init__.py,sha256=0OJ_jNKbS6GxzWC5-EfRCiE80as_ya0-wwyNsTYbxzY,1721
|
2
|
-
kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
|
3
|
-
kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
|
4
|
-
kreuzberg/_config.py,sha256=Au521UiR7vcQs_8_hhoWIfmDDMJIrDM3XZUB_qHfCmo,14035
|
5
|
-
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
-
kreuzberg/_document_classification.py,sha256=qFGmwvUMhnNAvNNJO7E-huPx-Ps-_DWxdNxsozIzgaw,6870
|
7
|
-
kreuzberg/_entity_extraction.py,sha256=Oa1T-9mptimpOHtcda-GtrVYH9PFy7DSJj3thJZUD7k,7902
|
8
|
-
kreuzberg/_gmft.py,sha256=6P4gSSmU39puaYAKmdGr9ALf0USYTwRDuvvhG1LmI24,26441
|
9
|
-
kreuzberg/_language_detection.py,sha256=_Ng2aHgPxOHFgd507gVNiIGVmnxxbpgYwsO0bD0yTzg,3315
|
10
|
-
kreuzberg/_mime_types.py,sha256=2warRVqfBUNIg8JBg8yP4pRqaMPvwINosHMkJwtH_Fc,8488
|
11
|
-
kreuzberg/_playa.py,sha256=_IPrUSWwSfDQlWXOpKlauV0D9MhGrujGP5kmQ0U3L0g,12188
|
12
|
-
kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
|
13
|
-
kreuzberg/_types.py,sha256=bMaU6VuoqwOpW6ufshA-DWpNw6t9EokjEDEfFsznvdo,15389
|
14
|
-
kreuzberg/cli.py,sha256=nG1CD_h50EWLmDbrb0_DffRl25uTCKeCS6_gRVpjEdU,12578
|
15
|
-
kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
|
16
|
-
kreuzberg/extraction.py,sha256=Kt1mOxdlOb35yVOdpdhiRPuTgA9BW_TTG9qwCkSxSkc,17332
|
17
|
-
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
kreuzberg/_api/main.py,sha256=8VwxRlIXwnPs7ZYm0saUZsNOjevEAWJQpNreG-X7ZpE,3273
|
20
|
-
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
kreuzberg/_extractors/_base.py,sha256=H_nwynBX3fozncVjV13c329x5eCLl5r7nyVTLQyDAzI,4396
|
22
|
-
kreuzberg/_extractors/_email.py,sha256=Jpr4NFef640uVgNFkR1or-omy8RVt-NOHUYgWRDjyBo,6753
|
23
|
-
kreuzberg/_extractors/_html.py,sha256=lOM1Tgrrvd7vpEeFAxC1dp0Tibr6N2FEHCjgFx0FK64,1745
|
24
|
-
kreuzberg/_extractors/_image.py,sha256=Iz1JpvGqcYyh9g4zO_bMZG3E9S39KNHFu8PrXDRXeOk,4513
|
25
|
-
kreuzberg/_extractors/_pandoc.py,sha256=51k7XISfKaPorhapG7aIeQb94KGsfozxKyT2rwhk9Bk,26553
|
26
|
-
kreuzberg/_extractors/_pdf.py,sha256=OflyvwEkuFLmw8E3si35MCGH31fvd5o50VdMmu5QRVs,19884
|
27
|
-
kreuzberg/_extractors/_presentation.py,sha256=CUlqZl_QCdJdumsZh0BpROkFbvi9uq7yMoIt3bRTUeE,10859
|
28
|
-
kreuzberg/_extractors/_spread_sheet.py,sha256=iagiyJsnl-89OP1eqmEv8jWl7gZBJm2x0YOyqBgLasA,13733
|
29
|
-
kreuzberg/_extractors/_structured.py,sha256=PbNaXd-_PUPsE0yZkISod_vLBokbWdVTKEPpEmqaEMM,5787
|
30
|
-
kreuzberg/_mcp/__init__.py,sha256=8PYV-omC8Rln7Cove8C3rHu3d7sR1FuiwSBG1O7vkAE,92
|
31
|
-
kreuzberg/_mcp/server.py,sha256=Dxed80MqZsYCFyYo0QdArpKE4H8DhpKY34fijdzV5uw,8731
|
32
|
-
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
33
|
-
kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
|
34
|
-
kreuzberg/_ocr/_easyocr.py,sha256=eU4MA_B_-cvq_IhpCeYUruL_kqcfm8maNZKP7zvVQHI,17512
|
35
|
-
kreuzberg/_ocr/_paddleocr.py,sha256=I7ns6L56a2Ol460Bge6e0hpc2AkkwDepLcpCsABj5Dc,17609
|
36
|
-
kreuzberg/_ocr/_tesseract.py,sha256=teLMH1pBhpcmEXDcyZlv56hYINLGMuaKZ0CQtcu_czQ,31510
|
37
|
-
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
|
-
kreuzberg/_utils/_cache.py,sha256=hYd_a5Ni5VJBE1XU_eN9gvQ5gg0FRsdbRgmJe-OIJHM,15253
|
39
|
-
kreuzberg/_utils/_device.py,sha256=JI9p9TGSfQHEi2SL-ovOXMr9RUnVq-RrEly89OvmQ5w,10485
|
40
|
-
kreuzberg/_utils/_document_cache.py,sha256=ka90JIT-FXUMOv8z2u3fztQgZZb2XQDHTMnBi32mySA,7005
|
41
|
-
kreuzberg/_utils/_errors.py,sha256=UsktQ_p7eOj9crPsFDg8HgRSE5-IpuFC7y1e6dDI_fY,6503
|
42
|
-
kreuzberg/_utils/_pdf_lock.py,sha256=nqxAYCNlfWDrJtP4ZNu57st1YnkDl-gYXdr0q8nv0kA,1961
|
43
|
-
kreuzberg/_utils/_process_pool.py,sha256=4BqhmRspwMyPT2EBfTu_rrn7v722wlMLD8qlYvYsc00,8621
|
44
|
-
kreuzberg/_utils/_quality.py,sha256=-nKzj5n7yJDYrvl556oq2T5S5oKMEOrjpcRMlZ00Jqo,7668
|
45
|
-
kreuzberg/_utils/_serialization.py,sha256=cqqxqN2cmtndBhIr4v2wqiMwnNadnKhvuN7EUj3i18M,2290
|
46
|
-
kreuzberg/_utils/_string.py,sha256=bCzO3UO6nXupxvtMWvHqfp1Vd9CTzEH9jmpJXQ7upAU,6800
|
47
|
-
kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
|
48
|
-
kreuzberg/_utils/_table.py,sha256=IomrfQBP85DZI8RmQjOVs2Siq7VP9FUTYPaZR4t3yRw,8199
|
49
|
-
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
50
|
-
kreuzberg-3.11.4.dist-info/METADATA,sha256=l3d8PyVfX_aEgXl5ykkuRHJi-8Qzhu4_KcHDYOK2RYg,12136
|
51
|
-
kreuzberg-3.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
52
|
-
kreuzberg-3.11.4.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
53
|
-
kreuzberg-3.11.4.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
54
|
-
kreuzberg-3.11.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|