kreuzberg 3.9.0__py3-none-any.whl → 3.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kreuzberg-3.9.0.dist-info → kreuzberg-3.9.1.dist-info}/METADATA +11 -11
- {kreuzberg-3.9.0.dist-info → kreuzberg-3.9.1.dist-info}/RECORD +5 -5
- {kreuzberg-3.9.0.dist-info → kreuzberg-3.9.1.dist-info}/WHEEL +0 -0
- {kreuzberg-3.9.0.dist-info → kreuzberg-3.9.1.dist-info}/entry_points.txt +0 -0
- {kreuzberg-3.9.0.dist-info → kreuzberg-3.9.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.9.
|
3
|
+
Version: 3.9.1
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -29,12 +29,12 @@ Classifier: Topic :: Text Processing :: General
|
|
29
29
|
Classifier: Typing :: Typed
|
30
30
|
Requires-Python: >=3.10
|
31
31
|
Requires-Dist: anyio>=4.9.0
|
32
|
-
Requires-Dist: chardetng-py>=0.3.
|
32
|
+
Requires-Dist: chardetng-py>=0.3.5
|
33
33
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
34
|
-
Requires-Dist: html-to-markdown[lxml]>=1.
|
35
|
-
Requires-Dist: mcp>=1.
|
34
|
+
Requires-Dist: html-to-markdown[lxml]>=1.9.0
|
35
|
+
Requires-Dist: mcp>=1.12.2
|
36
36
|
Requires-Dist: msgspec>=0.18.0
|
37
|
-
Requires-Dist: playa-pdf>=0.6.
|
37
|
+
Requires-Dist: playa-pdf>=0.6.4
|
38
38
|
Requires-Dist: psutil>=7.0.0
|
39
39
|
Requires-Dist: pypdfium2==4.30.0
|
40
40
|
Requires-Dist: python-calamine>=0.3.2
|
@@ -53,7 +53,7 @@ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'all
|
|
53
53
|
Requires-Dist: mailparse>=1.0.15; extra == 'all'
|
54
54
|
Requires-Dist: paddleocr>=3.1.0; extra == 'all'
|
55
55
|
Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
|
56
|
-
Requires-Dist: rich>=14.
|
56
|
+
Requires-Dist: rich>=14.1.0; extra == 'all'
|
57
57
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
|
58
58
|
Requires-Dist: setuptools>=80.9.0; extra == 'all'
|
59
59
|
Requires-Dist: spacy>=3.8.7; extra == 'all'
|
@@ -67,7 +67,7 @@ Provides-Extra: chunking
|
|
67
67
|
Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
|
68
68
|
Provides-Extra: cli
|
69
69
|
Requires-Dist: click>=8.2.1; extra == 'cli'
|
70
|
-
Requires-Dist: rich>=14.
|
70
|
+
Requires-Dist: rich>=14.1.0; extra == 'cli'
|
71
71
|
Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
|
72
72
|
Provides-Extra: easyocr
|
73
73
|
Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
|
@@ -130,14 +130,14 @@ Kreuzberg leverages established open source technologies:
|
|
130
130
|
### Extract Text with CLI
|
131
131
|
|
132
132
|
```bash
|
133
|
-
# Extract text from any file to
|
134
|
-
uvx kreuzberg extract document.pdf > output.
|
133
|
+
# Extract text from any file to text format
|
134
|
+
uvx kreuzberg extract document.pdf > output.txt
|
135
135
|
|
136
136
|
# With all features (OCR, table extraction, etc.)
|
137
|
-
uvx --from "kreuzberg[all]" kreuzberg extract invoice.pdf --ocr --format
|
137
|
+
uvx --from "kreuzberg[all]" kreuzberg extract invoice.pdf --ocr-backend tesseract --output-format text
|
138
138
|
|
139
139
|
# Extract with rich metadata
|
140
|
-
uvx kreuzberg extract report.pdf --show-metadata --format json
|
140
|
+
uvx kreuzberg extract report.pdf --show-metadata --output-format json
|
141
141
|
```
|
142
142
|
|
143
143
|
### Python Usage
|
@@ -47,8 +47,8 @@ kreuzberg/_utils/_string.py,sha256=bCzO3UO6nXupxvtMWvHqfp1Vd9CTzEH9jmpJXQ7upAU,6
|
|
47
47
|
kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
|
48
48
|
kreuzberg/_utils/_table.py,sha256=IomrfQBP85DZI8RmQjOVs2Siq7VP9FUTYPaZR4t3yRw,8199
|
49
49
|
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
50
|
-
kreuzberg-3.9.
|
51
|
-
kreuzberg-3.9.
|
52
|
-
kreuzberg-3.9.
|
53
|
-
kreuzberg-3.9.
|
54
|
-
kreuzberg-3.9.
|
50
|
+
kreuzberg-3.9.1.dist-info/METADATA,sha256=rBzP4yLvNuodmSrOUNXeYnUZCEPocULKhSjykSlPBeU,11908
|
51
|
+
kreuzberg-3.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
52
|
+
kreuzberg-3.9.1.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
53
|
+
kreuzberg-3.9.1.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
54
|
+
kreuzberg-3.9.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|