kreuzberg 3.1.6__tar.gz → 3.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/PKG-INFO +11 -10
  2. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/README.md +1 -0
  3. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg.egg-info/PKG-INFO +11 -10
  4. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg.egg-info/requires.txt +9 -9
  5. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/pyproject.toml +13 -13
  6. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/LICENSE +0 -0
  7. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/__init__.py +0 -0
  8. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_chunker.py +0 -0
  9. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_constants.py +0 -0
  10. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_extractors/__init__.py +0 -0
  11. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_extractors/_base.py +0 -0
  12. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_extractors/_html.py +0 -0
  13. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_extractors/_image.py +0 -0
  14. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_extractors/_pandoc.py +0 -0
  15. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_extractors/_pdf.py +0 -0
  16. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_extractors/_presentation.py +0 -0
  17. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_extractors/_spread_sheet.py +0 -0
  18. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_gmft.py +0 -0
  19. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_mime_types.py +0 -0
  20. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_ocr/__init__.py +0 -0
  21. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_ocr/_base.py +0 -0
  22. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_ocr/_easyocr.py +0 -0
  23. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_ocr/_paddleocr.py +0 -0
  24. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_ocr/_tesseract.py +0 -0
  25. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_playa.py +0 -0
  26. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_registry.py +0 -0
  27. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_types.py +0 -0
  28. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_utils/__init__.py +0 -0
  29. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_utils/_string.py +0 -0
  30. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_utils/_sync.py +0 -0
  31. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/_utils/_tmp.py +0 -0
  32. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/exceptions.py +0 -0
  33. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/extraction.py +0 -0
  34. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg/py.typed +0 -0
  35. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg.egg-info/SOURCES.txt +0 -0
  36. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg.egg-info/dependency_links.txt +0 -0
  37. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/kreuzberg.egg-info/top_level.txt +0 -0
  38. {kreuzberg-3.1.6 → kreuzberg-3.1.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.1.6
3
+ Version: 3.1.7
4
4
  Summary: A text extraction library supporting PDFs, images, office documents and more
5
5
  Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
6
6
  License: MIT
@@ -27,33 +27,34 @@ License-File: LICENSE
27
27
  Requires-Dist: anyio>=4.9.0
28
28
  Requires-Dist: charset-normalizer>=3.4.2
29
29
  Requires-Dist: exceptiongroup>=1.2.2; python_version < "3.11"
30
- Requires-Dist: html-to-markdown>=1.3.2
31
- Requires-Dist: playa-pdf==0.4.3
30
+ Requires-Dist: html-to-markdown>=1.3.3
31
+ Requires-Dist: playa-pdf>=0.5.1
32
32
  Requires-Dist: pypdfium2==4.30.0
33
33
  Requires-Dist: python-calamine>=0.3.2
34
34
  Requires-Dist: python-pptx>=1.0.2
35
- Requires-Dist: typing-extensions>=4.13.2; python_version < "3.12"
35
+ Requires-Dist: typing-extensions>=4.14.0; python_version < "3.12"
36
36
  Provides-Extra: all
37
37
  Requires-Dist: easyocr>=1.7.2; extra == "all"
38
38
  Requires-Dist: gmft>=0.4.1; extra == "all"
39
- Requires-Dist: paddleocr>=2.10.0; extra == "all"
39
+ Requires-Dist: paddleocr>=3.0.1; extra == "all"
40
40
  Requires-Dist: paddlepaddle>=3.0.0; extra == "all"
41
- Requires-Dist: semantic-text-splitter>=0.26.0; extra == "all"
42
- Requires-Dist: setuptools>=80.4.0; extra == "all"
41
+ Requires-Dist: semantic-text-splitter>=0.27.0; extra == "all"
42
+ Requires-Dist: setuptools>=80.9.0; extra == "all"
43
43
  Provides-Extra: chunking
44
- Requires-Dist: semantic-text-splitter>=0.26.0; extra == "chunking"
44
+ Requires-Dist: semantic-text-splitter>=0.27.0; extra == "chunking"
45
45
  Provides-Extra: easyocr
46
46
  Requires-Dist: easyocr>=1.7.2; extra == "easyocr"
47
47
  Provides-Extra: gmft
48
48
  Requires-Dist: gmft>=0.4.1; extra == "gmft"
49
49
  Provides-Extra: paddleocr
50
- Requires-Dist: paddleocr>=2.10.0; extra == "paddleocr"
50
+ Requires-Dist: paddleocr>=3.0.1; extra == "paddleocr"
51
51
  Requires-Dist: paddlepaddle>=3.0.0; extra == "paddleocr"
52
- Requires-Dist: setuptools>=80.4.0; extra == "paddleocr"
52
+ Requires-Dist: setuptools>=80.9.0; extra == "paddleocr"
53
53
  Dynamic: license-file
54
54
 
55
55
  # Kreuzberg
56
56
 
57
+ [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
57
58
  [![PyPI version](https://badge.fury.io/py/kreuzberg.svg)](https://badge.fury.io/py/kreuzberg)
58
59
  [![Documentation](https://img.shields.io/badge/docs-GitHub_Pages-blue)](https://goldziher.github.io/kreuzberg/)
59
60
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -1,5 +1,6 @@
1
1
  # Kreuzberg
2
2
 
3
+ [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
3
4
  [![PyPI version](https://badge.fury.io/py/kreuzberg.svg)](https://badge.fury.io/py/kreuzberg)
4
5
  [![Documentation](https://img.shields.io/badge/docs-GitHub_Pages-blue)](https://goldziher.github.io/kreuzberg/)
5
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.1.6
3
+ Version: 3.1.7
4
4
  Summary: A text extraction library supporting PDFs, images, office documents and more
5
5
  Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
6
6
  License: MIT
@@ -27,33 +27,34 @@ License-File: LICENSE
27
27
  Requires-Dist: anyio>=4.9.0
28
28
  Requires-Dist: charset-normalizer>=3.4.2
29
29
  Requires-Dist: exceptiongroup>=1.2.2; python_version < "3.11"
30
- Requires-Dist: html-to-markdown>=1.3.2
31
- Requires-Dist: playa-pdf==0.4.3
30
+ Requires-Dist: html-to-markdown>=1.3.3
31
+ Requires-Dist: playa-pdf>=0.5.1
32
32
  Requires-Dist: pypdfium2==4.30.0
33
33
  Requires-Dist: python-calamine>=0.3.2
34
34
  Requires-Dist: python-pptx>=1.0.2
35
- Requires-Dist: typing-extensions>=4.13.2; python_version < "3.12"
35
+ Requires-Dist: typing-extensions>=4.14.0; python_version < "3.12"
36
36
  Provides-Extra: all
37
37
  Requires-Dist: easyocr>=1.7.2; extra == "all"
38
38
  Requires-Dist: gmft>=0.4.1; extra == "all"
39
- Requires-Dist: paddleocr>=2.10.0; extra == "all"
39
+ Requires-Dist: paddleocr>=3.0.1; extra == "all"
40
40
  Requires-Dist: paddlepaddle>=3.0.0; extra == "all"
41
- Requires-Dist: semantic-text-splitter>=0.26.0; extra == "all"
42
- Requires-Dist: setuptools>=80.4.0; extra == "all"
41
+ Requires-Dist: semantic-text-splitter>=0.27.0; extra == "all"
42
+ Requires-Dist: setuptools>=80.9.0; extra == "all"
43
43
  Provides-Extra: chunking
44
- Requires-Dist: semantic-text-splitter>=0.26.0; extra == "chunking"
44
+ Requires-Dist: semantic-text-splitter>=0.27.0; extra == "chunking"
45
45
  Provides-Extra: easyocr
46
46
  Requires-Dist: easyocr>=1.7.2; extra == "easyocr"
47
47
  Provides-Extra: gmft
48
48
  Requires-Dist: gmft>=0.4.1; extra == "gmft"
49
49
  Provides-Extra: paddleocr
50
- Requires-Dist: paddleocr>=2.10.0; extra == "paddleocr"
50
+ Requires-Dist: paddleocr>=3.0.1; extra == "paddleocr"
51
51
  Requires-Dist: paddlepaddle>=3.0.0; extra == "paddleocr"
52
- Requires-Dist: setuptools>=80.4.0; extra == "paddleocr"
52
+ Requires-Dist: setuptools>=80.9.0; extra == "paddleocr"
53
53
  Dynamic: license-file
54
54
 
55
55
  # Kreuzberg
56
56
 
57
+ [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
57
58
  [![PyPI version](https://badge.fury.io/py/kreuzberg.svg)](https://badge.fury.io/py/kreuzberg)
58
59
  [![Documentation](https://img.shields.io/badge/docs-GitHub_Pages-blue)](https://goldziher.github.io/kreuzberg/)
59
60
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -1,7 +1,7 @@
1
1
  anyio>=4.9.0
2
2
  charset-normalizer>=3.4.2
3
- html-to-markdown>=1.3.2
4
- playa-pdf==0.4.3
3
+ html-to-markdown>=1.3.3
4
+ playa-pdf>=0.5.1
5
5
  pypdfium2==4.30.0
6
6
  python-calamine>=0.3.2
7
7
  python-pptx>=1.0.2
@@ -10,18 +10,18 @@ python-pptx>=1.0.2
10
10
  exceptiongroup>=1.2.2
11
11
 
12
12
  [:python_version < "3.12"]
13
- typing-extensions>=4.13.2
13
+ typing-extensions>=4.14.0
14
14
 
15
15
  [all]
16
16
  easyocr>=1.7.2
17
17
  gmft>=0.4.1
18
- paddleocr>=2.10.0
18
+ paddleocr>=3.0.1
19
19
  paddlepaddle>=3.0.0
20
- semantic-text-splitter>=0.26.0
21
- setuptools>=80.4.0
20
+ semantic-text-splitter>=0.27.0
21
+ setuptools>=80.9.0
22
22
 
23
23
  [chunking]
24
- semantic-text-splitter>=0.26.0
24
+ semantic-text-splitter>=0.27.0
25
25
 
26
26
  [easyocr]
27
27
  easyocr>=1.7.2
@@ -30,6 +30,6 @@ easyocr>=1.7.2
30
30
  gmft>=0.4.1
31
31
 
32
32
  [paddleocr]
33
- paddleocr>=2.10.0
33
+ paddleocr>=3.0.1
34
34
  paddlepaddle>=3.0.0
35
- setuptools>=80.4.0
35
+ setuptools>=80.9.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "kreuzberg"
3
- version = "3.1.6"
3
+ version = "3.1.7"
4
4
  description = "A text extraction library supporting PDFs, images, office documents and more"
5
5
  readme = "README.md"
6
6
  keywords = [
@@ -40,12 +40,12 @@ dependencies = [
40
40
  "anyio>=4.9.0",
41
41
  "charset-normalizer>=3.4.2",
42
42
  "exceptiongroup>=1.2.2; python_version<'3.11'",
43
- "html-to-markdown>=1.3.2",
44
- "playa-pdf==0.4.3", # pinned due to breaking changes in 0.5.0
43
+ "html-to-markdown>=1.3.3",
44
+ "playa-pdf>=0.5.1", # pinned due to breaking changes in 0.5.0
45
45
  "pypdfium2==4.30.0", # pinned due to bug in 4.30.1, until v5 is stable
46
46
  "python-calamine>=0.3.2",
47
47
  "python-pptx>=1.0.2",
48
- "typing-extensions>=4.13.2; python_version<'3.12'",
48
+ "typing-extensions>=4.14.0; python_version<'3.12'",
49
49
  ]
50
50
 
51
51
  optional-dependencies.all = [
@@ -54,14 +54,14 @@ optional-dependencies.all = [
54
54
  # gmft
55
55
  "gmft>=0.4.1",
56
56
  # paddle
57
- "paddleocr>=2.10.0",
57
+ "paddleocr>=3.0.1",
58
58
  "paddlepaddle>=3.0.0",
59
59
  # chunking
60
- "semantic-text-splitter>=0.26.0",
61
- "setuptools>=80.4.0",
60
+ "semantic-text-splitter>=0.27.0",
61
+ "setuptools>=80.9.0",
62
62
  ]
63
63
  optional-dependencies.chunking = [
64
- "semantic-text-splitter>=0.26.0",
64
+ "semantic-text-splitter>=0.27.0",
65
65
  ]
66
66
  optional-dependencies.easyocr = [
67
67
  "easyocr>=1.7.2",
@@ -70,22 +70,22 @@ optional-dependencies.gmft = [
70
70
  "gmft>=0.4.1",
71
71
  ]
72
72
  optional-dependencies.paddleocr = [
73
- "paddleocr>=2.10.0",
73
+ "paddleocr>=3.0.1",
74
74
  "paddlepaddle>=3.0.0",
75
- "setuptools>=80.4.0",
75
+ "setuptools>=80.9.0",
76
76
  ]
77
77
  urls.homepage = "https://github.com/Goldziher/kreuzberg"
78
78
 
79
79
  [dependency-groups]
80
80
  dev = [
81
81
  "covdefaults>=2.3.0",
82
- "mypy>=1.15.0",
82
+ "mypy>=1.16.0",
83
83
  "pre-commit>=4.2.0",
84
- "pytest>=8.3.5",
84
+ "pytest>=8.4.0",
85
85
  "pytest-cov>=6.1.1",
86
86
  "pytest-mock>=3.14.0",
87
87
  "pytest-timeout>=2.4.0",
88
- "ruff>=0.11.9",
88
+ "ruff>=0.11.13",
89
89
  "trio>=0.30.0",
90
90
  "uv-bump",
91
91
  ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes