chunkr-ai 0.0.21__tar.gz → 0.0.23__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (24) hide show
  1. chunkr_ai-0.0.23/LICENSE +21 -0
  2. {chunkr_ai-0.0.21/src/chunkr_ai.egg-info → chunkr_ai-0.0.23}/PKG-INFO +23 -2
  3. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/pyproject.toml +2 -2
  4. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/chunkr.py +3 -3
  5. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/chunkr_base.py +1 -128
  6. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/decorators.py +3 -3
  7. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/misc.py +27 -6
  8. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/task_response.py +2 -4
  9. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/models.py +1 -0
  10. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23/src/chunkr_ai.egg-info}/PKG-INFO +23 -2
  11. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/tests/test_chunkr.py +2 -2
  12. chunkr_ai-0.0.21/LICENSE +0 -0
  13. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/README.md +0 -0
  14. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/setup.cfg +0 -0
  15. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/__init__.py +0 -0
  16. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/__init__.py +0 -0
  17. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/api.py +0 -0
  18. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/auth.py +0 -0
  19. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/config.py +0 -0
  20. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai/api/protocol.py +0 -0
  21. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  22. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  23. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai.egg-info/requires.txt +1 -1
  24. {chunkr_ai-0.0.21 → chunkr_ai-0.0.23}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Lumina AI INC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,19 +1,40 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.21
3
+ Version: 0.0.23
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Lumina AI INC
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
6
27
  Project-URL: Homepage, https://chunkr.ai
7
28
  Description-Content-Type: text/markdown
8
29
  License-File: LICENSE
9
30
  Requires-Dist: httpx>=0.25.0
10
31
  Requires-Dist: pillow>=10.0.0
11
32
  Requires-Dist: pydantic>=2.0.0
12
- Requires-Dist: pytest-asyncio>=0.21.0
13
33
  Requires-Dist: python-dotenv>=0.19.0
14
34
  Provides-Extra: test
15
35
  Requires-Dist: pytest>=7.0.0; extra == "test"
16
36
  Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
37
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
17
38
 
18
39
  # Chunkr Python Client
19
40
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.21"
7
+ version = "0.0.23"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -14,7 +14,6 @@ dependencies = [
14
14
  "httpx>=0.25.0",
15
15
  "pillow>=10.0.0",
16
16
  "pydantic>=2.0.0",
17
- "pytest-asyncio>=0.21.0",
18
17
  "python-dotenv>=0.19.0",
19
18
  ]
20
19
 
@@ -22,5 +21,6 @@ dependencies = [
22
21
  test = [
23
22
  "pytest>=7.0.0",
24
23
  "pytest-xdist>=3.0.0",
24
+ "pytest-asyncio>=0.21.0",
25
25
  ]
26
26
 
@@ -34,7 +34,7 @@ class Chunkr(ChunkrBase):
34
34
  file: Union[str, Path, BinaryIO, Image.Image],
35
35
  config: Configuration = None,
36
36
  ) -> TaskResponse:
37
- files = prepare_upload_data(file, config)
37
+ files = await prepare_upload_data(file, config, self._client)
38
38
  r = await self._client.post(
39
39
  f"{self.url}/api/v1/task", files=files, headers=self._headers()
40
40
  )
@@ -44,7 +44,7 @@ class Chunkr(ChunkrBase):
44
44
  @anywhere()
45
45
  @ensure_client()
46
46
  async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
47
- files = prepare_upload_data(None, config)
47
+ files = await prepare_upload_data(None, config, self._client)
48
48
  r = await self._client.patch(
49
49
  f"{self.url}/api/v1/task/{task_id}",
50
50
  files=files,
@@ -70,8 +70,8 @@ class Chunkr(ChunkrBase):
70
70
  )
71
71
  r.raise_for_status()
72
72
 
73
- @ensure_client()
74
73
  @anywhere()
74
+ @ensure_client()
75
75
  async def cancel_task(self, task_id: str) -> None:
76
76
  r = await self._client.get(
77
77
  f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
@@ -4,13 +4,10 @@ from .auth import HeadersMixin
4
4
  from abc import abstractmethod
5
5
  from dotenv import load_dotenv
6
6
  import httpx
7
- import io
8
- import json
9
7
  import os
10
8
  from pathlib import Path
11
9
  from PIL import Image
12
- import requests
13
- from typing import BinaryIO, Tuple, Union
10
+ from typing import BinaryIO, Union
14
11
 
15
12
 
16
13
  class ChunkrBase(HeadersMixin):
@@ -28,130 +25,6 @@ class ChunkrBase(HeadersMixin):
28
25
  self.url = self.url.rstrip("/")
29
26
  self._client = httpx.AsyncClient()
30
27
 
31
- def _prepare_file(
32
- self, file: Union[str, Path, BinaryIO, Image.Image]
33
- ) -> Tuple[str, BinaryIO]:
34
- """Convert various file types into a tuple of (filename, file-like object).
35
-
36
- Args:
37
- file: Input file, can be:
38
- - String or Path to a file
39
- - URL string starting with http:// or https://
40
- - Base64 string
41
- - Opened binary file (mode='rb')
42
- - PIL/Pillow Image object
43
-
44
- Returns:
45
- Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
46
-
47
- Raises:
48
- FileNotFoundError: If the file path doesn't exist
49
- TypeError: If the file type is not supported
50
- ValueError: If the URL is invalid or unreachable
51
- ValueError: If the MIME type is unsupported
52
- """
53
- # Handle URLs
54
- if isinstance(file, str) and (
55
- file.startswith("http://") or file.startswith("https://")
56
- ):
57
- response = requests.get(file)
58
- response.raise_for_status()
59
- file_obj = io.BytesIO(response.content)
60
- filename = Path(file.split("/")[-1]).name or "downloaded_file"
61
- return filename, file_obj
62
-
63
- # Handle base64 strings
64
- if isinstance(file, str) and "," in file and ";base64," in file:
65
- try:
66
- # Split header and data
67
- header, base64_data = file.split(",", 1)
68
- import base64
69
-
70
- file_bytes = base64.b64decode(base64_data)
71
- file_obj = io.BytesIO(file_bytes)
72
-
73
- # Try to determine format from header
74
- format = "bin"
75
- mime_type = header.split(":")[-1].split(";")[0].lower()
76
-
77
- # Map MIME types to file extensions
78
- mime_to_ext = {
79
- "application/pdf": "pdf",
80
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
81
- "application/msword": "doc",
82
- "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
83
- "application/vnd.ms-powerpoint": "ppt",
84
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
85
- "application/vnd.ms-excel": "xls",
86
- "image/jpeg": "jpg",
87
- "image/png": "png",
88
- "image/jpg": "jpg",
89
- }
90
-
91
- if mime_type in mime_to_ext:
92
- format = mime_to_ext[mime_type]
93
- else:
94
- raise ValueError(f"Unsupported MIME type: {mime_type}")
95
-
96
- return f"file.{format}", file_obj
97
- except Exception as e:
98
- raise ValueError(f"Invalid base64 string: {str(e)}")
99
-
100
- # Handle file paths
101
- if isinstance(file, (str, Path)):
102
- path = Path(file).resolve()
103
- if not path.exists():
104
- raise FileNotFoundError(f"File not found: {file}")
105
- return path.name, open(path, "rb")
106
-
107
- # Handle PIL Images
108
- if isinstance(file, Image.Image):
109
- img_byte_arr = io.BytesIO()
110
- format = file.format or "PNG"
111
- file.save(img_byte_arr, format=format)
112
- img_byte_arr.seek(0)
113
- return f"image.{format.lower()}", img_byte_arr
114
-
115
- # Handle file-like objects
116
- if hasattr(file, "read") and hasattr(file, "seek"):
117
- # Try to get the filename from the file object if possible
118
- name = (
119
- getattr(file, "name", "document")
120
- if hasattr(file, "name")
121
- else "document"
122
- )
123
- return Path(name).name, file
124
-
125
- raise TypeError(f"Unsupported file type: {type(file)}")
126
-
127
- def _prepare_upload_data(
128
- self,
129
- file: Union[str, Path, BinaryIO, Image.Image],
130
- config: Configuration = None,
131
- ) -> Tuple[dict, dict]:
132
- """Prepare files and data dictionaries for upload.
133
-
134
- Args:
135
- file: The file to upload
136
- config: Optional configuration settings
137
-
138
- Returns:
139
- Tuple[dict, dict]: (files dict, data dict) ready for upload
140
- """
141
- filename, file_obj = self._prepare_file(file)
142
- files = {"file": (filename, file_obj)}
143
- data = {}
144
-
145
- if config:
146
- config_dict = config.model_dump(mode="json", exclude_none=True)
147
- for key, value in config_dict.items():
148
- if isinstance(value, dict):
149
- files[key] = (None, json.dumps(value), "application/json")
150
- else:
151
- data[key] = value
152
-
153
- return files, data
154
-
155
28
  @abstractmethod
156
29
  def upload(
157
30
  self,
@@ -22,12 +22,12 @@ def anywhere():
22
22
  def wrapper(*args: P.args, **kwargs: P.kwargs) -> Union[Awaitable[T], T]:
23
23
  global _sync_loop
24
24
  try:
25
- loop = asyncio.get_running_loop()
26
- return async_func(*args, **kwargs)
25
+ asyncio.get_running_loop()
26
+ return async_func(*args, **kwargs)
27
27
  except RuntimeError:
28
28
  if _sync_loop is None:
29
29
  _sync_loop = asyncio.new_event_loop()
30
- asyncio.set_event_loop(_sync_loop)
30
+ asyncio.set_event_loop(_sync_loop)
31
31
  try:
32
32
  return _sync_loop.run_until_complete(async_func(*args, **kwargs))
33
33
  finally:
@@ -3,16 +3,36 @@ import io
3
3
  import json
4
4
  from pathlib import Path
5
5
  from PIL import Image
6
- import requests
6
+ import httpx
7
7
  from typing import Union, Tuple, BinaryIO, Optional
8
8
 
9
- def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, BinaryIO]:
10
- """Convert various file types into a tuple of (filename, file-like object)."""
9
+ async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image], client: httpx.AsyncClient = None) -> Tuple[str, BinaryIO]:
10
+ """Convert various file types into a tuple of (filename, file-like object).
11
+
12
+ Args:
13
+ file: Input file, can be:
14
+ - String or Path to a file
15
+ - URL string starting with http:// or https://
16
+ - Base64 string
17
+ - Opened binary file (mode='rb')
18
+ - PIL/Pillow Image object
19
+
20
+ Returns:
21
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
22
+
23
+ Raises:
24
+ FileNotFoundError: If the file path doesn't exist
25
+ TypeError: If the file type is not supported
26
+ ValueError: If the URL is invalid or unreachable
27
+ ValueError: If the MIME type is unsupported
28
+ """
11
29
  # Handle URLs
12
30
  if isinstance(file, str) and (
13
31
  file.startswith("http://") or file.startswith("https://")
14
32
  ):
15
- response = requests.get(file)
33
+ if not client:
34
+ raise ValueError("Client must be provided to download files from URLs")
35
+ response = client.get(file)
16
36
  response.raise_for_status()
17
37
 
18
38
  # Try to get filename from Content-Disposition header first
@@ -108,9 +128,10 @@ def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, Bi
108
128
  raise TypeError(f"Unsupported file type: {type(file)}")
109
129
 
110
130
 
111
- def prepare_upload_data(
131
+ async def prepare_upload_data(
112
132
  file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
113
133
  config: Optional[Configuration] = None,
134
+ client: httpx.AsyncClient = None,
114
135
  ) -> dict:
115
136
  """Prepare files and data dictionaries for upload.
116
137
 
@@ -123,7 +144,7 @@ def prepare_upload_data(
123
144
  """
124
145
  files = {}
125
146
  if file:
126
- filename, file_obj = prepare_file(file)
147
+ filename, file_obj = await prepare_file(file, client)
127
148
  files = {"file": (filename, file_obj)}
128
149
 
129
150
  if config:
@@ -35,10 +35,9 @@ class TaskResponse(BaseModel, Generic[T]):
35
35
  return self
36
36
  return None
37
37
 
38
+ @require_task()
38
39
  async def _poll_request(self) -> dict:
39
40
  try:
40
- if not self._client._client:
41
- raise ValueError("Client not found")
42
41
  r = await self._client._client.get(
43
42
  self.task_url, headers=self._client._headers()
44
43
  )
@@ -51,7 +50,6 @@ class TaskResponse(BaseModel, Generic[T]):
51
50
  raise
52
51
 
53
52
  @anywhere()
54
- @require_task()
55
53
  async def poll(self) -> T:
56
54
  """Poll the task for completion."""
57
55
  while True:
@@ -66,7 +64,7 @@ class TaskResponse(BaseModel, Generic[T]):
66
64
  @require_task()
67
65
  async def update(self, config: Configuration) -> T:
68
66
  """Update the task configuration."""
69
- f = prepare_upload_data(None, config)
67
+ f = await prepare_upload_data(None, config, self._client._client)
70
68
  r = await self._client._client.patch(
71
69
  self.task_url, files=f, headers=self._client._headers()
72
70
  )
@@ -39,3 +39,4 @@ __all__ = [
39
39
  "TaskResponse",
40
40
  "Pipeline",
41
41
  ]
42
+
@@ -1,19 +1,40 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.21
3
+ Version: 0.0.23
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Lumina AI INC
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
6
27
  Project-URL: Homepage, https://chunkr.ai
7
28
  Description-Content-Type: text/markdown
8
29
  License-File: LICENSE
9
30
  Requires-Dist: httpx>=0.25.0
10
31
  Requires-Dist: pillow>=10.0.0
11
32
  Requires-Dist: pydantic>=2.0.0
12
- Requires-Dist: pytest-asyncio>=0.21.0
13
33
  Requires-Dist: python-dotenv>=0.19.0
14
34
  Provides-Extra: test
15
35
  Requires-Dist: pytest>=7.0.0; extra == "test"
16
36
  Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
37
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
17
38
 
18
39
  # Chunkr Python Client
19
40
 
@@ -70,9 +70,9 @@ async def test_expires_in(client, sample_path):
70
70
  assert response.task_id is not None
71
71
  assert response.status == "Succeeded"
72
72
  assert response.output is not None
73
- await asyncio.sleep(10)
73
+ await asyncio.sleep(11)
74
74
  try:
75
- task = await client.get_task(response.task_id)
75
+ await client.get_task(response.task_id)
76
76
  assert False
77
77
  except Exception as e:
78
78
  print(e)
chunkr_ai-0.0.21/LICENSE DELETED
File without changes
File without changes
File without changes
@@ -1,9 +1,9 @@
1
1
  httpx>=0.25.0
2
2
  pillow>=10.0.0
3
3
  pydantic>=2.0.0
4
- pytest-asyncio>=0.21.0
5
4
  python-dotenv>=0.19.0
6
5
 
7
6
  [test]
8
7
  pytest>=7.0.0
9
8
  pytest-xdist>=3.0.0
9
+ pytest-asyncio>=0.21.0