chunkr-ai 0.3.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chunkr_ai-0.3.2/src/chunkr_ai.egg-info → chunkr_ai-0.3.4}/PKG-INFO +3 -1
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/pyproject.toml +7 -8
- chunkr_ai-0.3.4/src/chunkr_ai/__init__.py +14 -0
- chunkr_ai-0.3.4/src/chunkr_ai/api/auth.py +47 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/api/configuration.py +1 -1
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4/src/chunkr_ai.egg-info}/PKG-INFO +3 -1
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai.egg-info/requires.txt +3 -0
- chunkr_ai-0.3.2/src/chunkr_ai/__init__.py +0 -3
- chunkr_ai-0.3.2/src/chunkr_ai/api/auth.py +0 -13
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/LICENSE +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/README.md +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/setup.cfg +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/api/__init__.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/api/chunkr.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/api/chunkr_base.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/api/decorators.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/api/misc.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/api/protocol.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/api/task_response.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai/models.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/src/chunkr_ai.egg-info/top_level.txt +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/tests/test_chunkr.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/tests/test_excel.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/tests/test_file_handling.py +0 -0
- {chunkr_ai-0.3.2 → chunkr_ai-0.3.4}/tests/test_pages.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.4
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
License: MIT License
|
@@ -25,6 +25,7 @@ License: MIT License
|
|
25
25
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
26
26
|
SOFTWARE.
|
27
27
|
Project-URL: Homepage, https://chunkr.ai
|
28
|
+
Requires-Python: >=3.10
|
28
29
|
Description-Content-Type: text/markdown
|
29
30
|
License-File: LICENSE
|
30
31
|
Requires-Dist: httpx>=0.25.0
|
@@ -33,6 +34,7 @@ Requires-Dist: nest-asyncio>=1.6.0
|
|
33
34
|
Requires-Dist: pillow>=10.0.0
|
34
35
|
Requires-Dist: pydantic>=2.0.0
|
35
36
|
Requires-Dist: python-dotenv>=0.19.0
|
37
|
+
Requires-Dist: tomli>=1.2.0; python_version < "3.11"
|
36
38
|
Provides-Extra: test
|
37
39
|
Requires-Dist: pytest>=7.0.0; extra == "test"
|
38
40
|
Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
|
@@ -4,12 +4,13 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "chunkr-ai"
|
7
|
-
version = "0.3.
|
8
|
-
authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
|
7
|
+
version = "0.3.4"
|
8
|
+
authors = [{ "name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh" }]
|
9
9
|
description = "Python client for Chunkr: open source document intelligence"
|
10
10
|
readme = "README.md"
|
11
|
-
license = {"file" = "LICENSE"}
|
12
|
-
urls = {Homepage = "https://chunkr.ai"}
|
11
|
+
license = { "file" = "LICENSE" }
|
12
|
+
urls = { Homepage = "https://chunkr.ai" }
|
13
|
+
requires-python = ">=3.10"
|
13
14
|
dependencies = [
|
14
15
|
"httpx>=0.25.0",
|
15
16
|
"matplotlib>=3.10.3",
|
@@ -17,6 +18,7 @@ dependencies = [
|
|
17
18
|
"pillow>=10.0.0",
|
18
19
|
"pydantic>=2.0.0",
|
19
20
|
"python-dotenv>=0.19.0",
|
21
|
+
"tomli>=1.2.0; python_version<'3.11'",
|
20
22
|
]
|
21
23
|
|
22
24
|
[project.optional-dependencies]
|
@@ -28,7 +30,4 @@ test = [
|
|
28
30
|
]
|
29
31
|
|
30
32
|
[dependency-groups]
|
31
|
-
dev = [
|
32
|
-
"mypy>=1.17.1",
|
33
|
-
]
|
34
|
-
|
33
|
+
dev = ["mypy>=1.17.1"]
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from .api.chunkr import Chunkr
|
2
|
+
import tomllib
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
# Read version from pyproject.toml
|
6
|
+
try:
|
7
|
+
pyproject_path = Path(__file__).parent.parent.parent / "pyproject.toml"
|
8
|
+
with open(pyproject_path, "rb") as f:
|
9
|
+
pyproject_data = tomllib.load(f)
|
10
|
+
__version__ = pyproject_data["project"]["version"]
|
11
|
+
except Exception:
|
12
|
+
__version__ = "unknown"
|
13
|
+
|
14
|
+
__all__ = ["Chunkr", "__version__"]
|
@@ -0,0 +1,47 @@
|
|
1
|
+
import platform
|
2
|
+
import sys
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
# Handle tomllib import for Python 3.10 compatibility
|
6
|
+
try:
|
7
|
+
import tomllib
|
8
|
+
except ImportError:
|
9
|
+
import tomli as tomllib
|
10
|
+
|
11
|
+
def _find_pyproject_toml(start_path: Path) -> Path | None:
|
12
|
+
"""Search for pyproject.toml in current and parent directories."""
|
13
|
+
for parent in [start_path, *start_path.parents]:
|
14
|
+
candidate = parent / "pyproject.toml"
|
15
|
+
if candidate.is_file():
|
16
|
+
return candidate
|
17
|
+
return None
|
18
|
+
|
19
|
+
# Read version from pyproject.toml
|
20
|
+
try:
|
21
|
+
pyproject_path = _find_pyproject_toml(Path(__file__).resolve().parent)
|
22
|
+
if pyproject_path is not None:
|
23
|
+
with open(pyproject_path, "rb") as f:
|
24
|
+
pyproject_data = tomllib.load(f)
|
25
|
+
__version__ = pyproject_data["project"]["version"]
|
26
|
+
else:
|
27
|
+
__version__ = "unknown"
|
28
|
+
except Exception:
|
29
|
+
__version__ = "unknown"
|
30
|
+
|
31
|
+
class HeadersMixin:
|
32
|
+
"""Mixin class for handling authorization headers"""
|
33
|
+
_api_key: str = ""
|
34
|
+
|
35
|
+
def get_api_key(self) -> str:
|
36
|
+
"""Get the API key"""
|
37
|
+
if not hasattr(self, "_api_key") or not self._api_key:
|
38
|
+
raise ValueError("API key not set")
|
39
|
+
return self._api_key
|
40
|
+
|
41
|
+
def _headers(self) -> dict:
|
42
|
+
"""Generate authorization headers and version information"""
|
43
|
+
user_agent = f"chunkr-ai/{__version__} (Python/{sys.version.split()[0]}; {platform.system()}/{platform.release()})"
|
44
|
+
return {
|
45
|
+
"Authorization": self.get_api_key(),
|
46
|
+
"User-Agent": user_agent
|
47
|
+
}
|
@@ -85,7 +85,7 @@ class TokenizerType(BaseModel):
|
|
85
85
|
return {}
|
86
86
|
|
87
87
|
class ChunkProcessing(BaseModel):
|
88
|
-
ignore_headers_and_footers: Optional[bool] =
|
88
|
+
ignore_headers_and_footers: Optional[bool] = None # Deprecated
|
89
89
|
target_length: Optional[int] = None
|
90
90
|
tokenizer: Optional[Union[TokenizerType, Tokenizer, str]] = None
|
91
91
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.4
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
License: MIT License
|
@@ -25,6 +25,7 @@ License: MIT License
|
|
25
25
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
26
26
|
SOFTWARE.
|
27
27
|
Project-URL: Homepage, https://chunkr.ai
|
28
|
+
Requires-Python: >=3.10
|
28
29
|
Description-Content-Type: text/markdown
|
29
30
|
License-File: LICENSE
|
30
31
|
Requires-Dist: httpx>=0.25.0
|
@@ -33,6 +34,7 @@ Requires-Dist: nest-asyncio>=1.6.0
|
|
33
34
|
Requires-Dist: pillow>=10.0.0
|
34
35
|
Requires-Dist: pydantic>=2.0.0
|
35
36
|
Requires-Dist: python-dotenv>=0.19.0
|
37
|
+
Requires-Dist: tomli>=1.2.0; python_version < "3.11"
|
36
38
|
Provides-Extra: test
|
37
39
|
Requires-Dist: pytest>=7.0.0; extra == "test"
|
38
40
|
Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
|
@@ -1,13 +0,0 @@
|
|
1
|
-
class HeadersMixin:
|
2
|
-
"""Mixin class for handling authorization headers"""
|
3
|
-
_api_key: str = ""
|
4
|
-
|
5
|
-
def get_api_key(self) -> str:
|
6
|
-
"""Get the API key"""
|
7
|
-
if not hasattr(self, "_api_key") or not self._api_key:
|
8
|
-
raise ValueError("API key not set")
|
9
|
-
return self._api_key
|
10
|
-
|
11
|
-
def _headers(self) -> dict:
|
12
|
-
"""Generate authorization headers"""
|
13
|
-
return {"Authorization": self.get_api_key()}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|