chunkr-ai 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/chunkr.py CHANGED
@@ -34,7 +34,7 @@ class Chunkr(ChunkrBase):
34
34
  file: Union[str, Path, BinaryIO, Image.Image],
35
35
  config: Configuration = None,
36
36
  ) -> TaskResponse:
37
- files = prepare_upload_data(file, config)
37
+ files = await prepare_upload_data(file, config, self._client)
38
38
  r = await self._client.post(
39
39
  f"{self.url}/api/v1/task", files=files, headers=self._headers()
40
40
  )
@@ -44,7 +44,7 @@ class Chunkr(ChunkrBase):
44
44
  @anywhere()
45
45
  @ensure_client()
46
46
  async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
47
- files = prepare_upload_data(None, config)
47
+ files = await prepare_upload_data(None, config, self._client)
48
48
  r = await self._client.patch(
49
49
  f"{self.url}/api/v1/task/{task_id}",
50
50
  files=files,
@@ -70,8 +70,8 @@ class Chunkr(ChunkrBase):
70
70
  )
71
71
  r.raise_for_status()
72
72
 
73
- @ensure_client()
74
73
  @anywhere()
74
+ @ensure_client()
75
75
  async def cancel_task(self, task_id: str) -> None:
76
76
  r = await self._client.get(
77
77
  f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
@@ -4,13 +4,10 @@ from .auth import HeadersMixin
4
4
  from abc import abstractmethod
5
5
  from dotenv import load_dotenv
6
6
  import httpx
7
- import io
8
- import json
9
7
  import os
10
8
  from pathlib import Path
11
9
  from PIL import Image
12
- import requests
13
- from typing import BinaryIO, Tuple, Union
10
+ from typing import BinaryIO, Union
14
11
 
15
12
 
16
13
  class ChunkrBase(HeadersMixin):
@@ -28,130 +25,6 @@ class ChunkrBase(HeadersMixin):
28
25
  self.url = self.url.rstrip("/")
29
26
  self._client = httpx.AsyncClient()
30
27
 
31
- def _prepare_file(
32
- self, file: Union[str, Path, BinaryIO, Image.Image]
33
- ) -> Tuple[str, BinaryIO]:
34
- """Convert various file types into a tuple of (filename, file-like object).
35
-
36
- Args:
37
- file: Input file, can be:
38
- - String or Path to a file
39
- - URL string starting with http:// or https://
40
- - Base64 string
41
- - Opened binary file (mode='rb')
42
- - PIL/Pillow Image object
43
-
44
- Returns:
45
- Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
46
-
47
- Raises:
48
- FileNotFoundError: If the file path doesn't exist
49
- TypeError: If the file type is not supported
50
- ValueError: If the URL is invalid or unreachable
51
- ValueError: If the MIME type is unsupported
52
- """
53
- # Handle URLs
54
- if isinstance(file, str) and (
55
- file.startswith("http://") or file.startswith("https://")
56
- ):
57
- response = requests.get(file)
58
- response.raise_for_status()
59
- file_obj = io.BytesIO(response.content)
60
- filename = Path(file.split("/")[-1]).name or "downloaded_file"
61
- return filename, file_obj
62
-
63
- # Handle base64 strings
64
- if isinstance(file, str) and "," in file and ";base64," in file:
65
- try:
66
- # Split header and data
67
- header, base64_data = file.split(",", 1)
68
- import base64
69
-
70
- file_bytes = base64.b64decode(base64_data)
71
- file_obj = io.BytesIO(file_bytes)
72
-
73
- # Try to determine format from header
74
- format = "bin"
75
- mime_type = header.split(":")[-1].split(";")[0].lower()
76
-
77
- # Map MIME types to file extensions
78
- mime_to_ext = {
79
- "application/pdf": "pdf",
80
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
81
- "application/msword": "doc",
82
- "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
83
- "application/vnd.ms-powerpoint": "ppt",
84
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
85
- "application/vnd.ms-excel": "xls",
86
- "image/jpeg": "jpg",
87
- "image/png": "png",
88
- "image/jpg": "jpg",
89
- }
90
-
91
- if mime_type in mime_to_ext:
92
- format = mime_to_ext[mime_type]
93
- else:
94
- raise ValueError(f"Unsupported MIME type: {mime_type}")
95
-
96
- return f"file.{format}", file_obj
97
- except Exception as e:
98
- raise ValueError(f"Invalid base64 string: {str(e)}")
99
-
100
- # Handle file paths
101
- if isinstance(file, (str, Path)):
102
- path = Path(file).resolve()
103
- if not path.exists():
104
- raise FileNotFoundError(f"File not found: {file}")
105
- return path.name, open(path, "rb")
106
-
107
- # Handle PIL Images
108
- if isinstance(file, Image.Image):
109
- img_byte_arr = io.BytesIO()
110
- format = file.format or "PNG"
111
- file.save(img_byte_arr, format=format)
112
- img_byte_arr.seek(0)
113
- return f"image.{format.lower()}", img_byte_arr
114
-
115
- # Handle file-like objects
116
- if hasattr(file, "read") and hasattr(file, "seek"):
117
- # Try to get the filename from the file object if possible
118
- name = (
119
- getattr(file, "name", "document")
120
- if hasattr(file, "name")
121
- else "document"
122
- )
123
- return Path(name).name, file
124
-
125
- raise TypeError(f"Unsupported file type: {type(file)}")
126
-
127
- def _prepare_upload_data(
128
- self,
129
- file: Union[str, Path, BinaryIO, Image.Image],
130
- config: Configuration = None,
131
- ) -> Tuple[dict, dict]:
132
- """Prepare files and data dictionaries for upload.
133
-
134
- Args:
135
- file: The file to upload
136
- config: Optional configuration settings
137
-
138
- Returns:
139
- Tuple[dict, dict]: (files dict, data dict) ready for upload
140
- """
141
- filename, file_obj = self._prepare_file(file)
142
- files = {"file": (filename, file_obj)}
143
- data = {}
144
-
145
- if config:
146
- config_dict = config.model_dump(mode="json", exclude_none=True)
147
- for key, value in config_dict.items():
148
- if isinstance(value, dict):
149
- files[key] = (None, json.dumps(value), "application/json")
150
- else:
151
- data[key] = value
152
-
153
- return files, data
154
-
155
28
  @abstractmethod
156
29
  def upload(
157
30
  self,
@@ -22,12 +22,12 @@ def anywhere():
22
22
  def wrapper(*args: P.args, **kwargs: P.kwargs) -> Union[Awaitable[T], T]:
23
23
  global _sync_loop
24
24
  try:
25
- loop = asyncio.get_running_loop()
26
- return async_func(*args, **kwargs)
25
+ asyncio.get_running_loop()
26
+ return async_func(*args, **kwargs)
27
27
  except RuntimeError:
28
28
  if _sync_loop is None:
29
29
  _sync_loop = asyncio.new_event_loop()
30
- asyncio.set_event_loop(_sync_loop)
30
+ asyncio.set_event_loop(_sync_loop)
31
31
  try:
32
32
  return _sync_loop.run_until_complete(async_func(*args, **kwargs))
33
33
  finally:
chunkr_ai/api/misc.py CHANGED
@@ -3,16 +3,36 @@ import io
3
3
  import json
4
4
  from pathlib import Path
5
5
  from PIL import Image
6
- import requests
6
+ import httpx
7
7
  from typing import Union, Tuple, BinaryIO, Optional
8
8
 
9
- def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, BinaryIO]:
10
- """Convert various file types into a tuple of (filename, file-like object)."""
9
+ async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image], client: httpx.AsyncClient = None) -> Tuple[str, BinaryIO]:
10
+ """Convert various file types into a tuple of (filename, file-like object).
11
+
12
+ Args:
13
+ file: Input file, can be:
14
+ - String or Path to a file
15
+ - URL string starting with http:// or https://
16
+ - Base64 string
17
+ - Opened binary file (mode='rb')
18
+ - PIL/Pillow Image object
19
+
20
+ Returns:
21
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
22
+
23
+ Raises:
24
+ FileNotFoundError: If the file path doesn't exist
25
+ TypeError: If the file type is not supported
26
+ ValueError: If the URL is invalid or unreachable
27
+ ValueError: If the MIME type is unsupported
28
+ """
11
29
  # Handle URLs
12
30
  if isinstance(file, str) and (
13
31
  file.startswith("http://") or file.startswith("https://")
14
32
  ):
15
- response = requests.get(file)
33
+ if not client:
34
+ raise ValueError("Client must be provided to download files from URLs")
35
+ response = client.get(file)
16
36
  response.raise_for_status()
17
37
 
18
38
  # Try to get filename from Content-Disposition header first
@@ -108,9 +128,10 @@ def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, Bi
108
128
  raise TypeError(f"Unsupported file type: {type(file)}")
109
129
 
110
130
 
111
- def prepare_upload_data(
131
+ async def prepare_upload_data(
112
132
  file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
113
133
  config: Optional[Configuration] = None,
134
+ client: httpx.AsyncClient = None,
114
135
  ) -> dict:
115
136
  """Prepare files and data dictionaries for upload.
116
137
 
@@ -123,7 +144,7 @@ def prepare_upload_data(
123
144
  """
124
145
  files = {}
125
146
  if file:
126
- filename, file_obj = prepare_file(file)
147
+ filename, file_obj = await prepare_file(file, client)
127
148
  files = {"file": (filename, file_obj)}
128
149
 
129
150
  if config:
@@ -35,10 +35,9 @@ class TaskResponse(BaseModel, Generic[T]):
35
35
  return self
36
36
  return None
37
37
 
38
+ @require_task()
38
39
  async def _poll_request(self) -> dict:
39
40
  try:
40
- if not self._client._client:
41
- raise ValueError("Client not found")
42
41
  r = await self._client._client.get(
43
42
  self.task_url, headers=self._client._headers()
44
43
  )
@@ -51,7 +50,6 @@ class TaskResponse(BaseModel, Generic[T]):
51
50
  raise
52
51
 
53
52
  @anywhere()
54
- @require_task()
55
53
  async def poll(self) -> T:
56
54
  """Poll the task for completion."""
57
55
  while True:
@@ -66,7 +64,7 @@ class TaskResponse(BaseModel, Generic[T]):
66
64
  @require_task()
67
65
  async def update(self, config: Configuration) -> T:
68
66
  """Update the task configuration."""
69
- f = prepare_upload_data(None, config)
67
+ f = await prepare_upload_data(None, config, self._client._client)
70
68
  r = await self._client._client.patch(
71
69
  self.task_url, files=f, headers=self._client._headers()
72
70
  )
chunkr_ai/models.py CHANGED
@@ -39,3 +39,4 @@ __all__ = [
39
39
  "TaskResponse",
40
40
  "Pipeline",
41
41
  ]
42
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Lumina AI INC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,19 +1,40 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.21
3
+ Version: 0.0.23
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Lumina AI INC
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
6
27
  Project-URL: Homepage, https://chunkr.ai
7
28
  Description-Content-Type: text/markdown
8
29
  License-File: LICENSE
9
30
  Requires-Dist: httpx>=0.25.0
10
31
  Requires-Dist: pillow>=10.0.0
11
32
  Requires-Dist: pydantic>=2.0.0
12
- Requires-Dist: pytest-asyncio>=0.21.0
13
33
  Requires-Dist: python-dotenv>=0.19.0
14
34
  Provides-Extra: test
15
35
  Requires-Dist: pytest>=7.0.0; extra == "test"
16
36
  Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
37
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
17
38
 
18
39
  # Chunkr Python Client
19
40
 
@@ -0,0 +1,17 @@
1
+ chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
+ chunkr_ai/models.py,sha256=MK8FPbWDj1ynvSHaYuslKCPybxLuAlrsVIM3Eym3kKI,750
3
+ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
6
+ chunkr_ai/api/chunkr.py,sha256=XTXJFs0xjYY3w3N4fSQcxtJFBtNfzFYYkh6nDlFz4cY,2714
7
+ chunkr_ai/api/chunkr_base.py,sha256=OkycHDHkdGX953_ab0XdYBnPDzSXYE30L3j52hBb8D0,5046
8
+ chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
9
+ chunkr_ai/api/decorators.py,sha256=y_Z9z0O2XXiX9z6jWDwdbCPdQyMLnjE0pGkJjHQEv_Q,2652
10
+ chunkr_ai/api/misc.py,sha256=5Q2K713VPwf3S2519KTzjT9PKhTEBgBMk1d8NNnmpZ0,5717
11
+ chunkr_ai/api/protocol.py,sha256=Nt8aWr4ouVwCvoLqVI5vnXJhT2cvxt0sQC-svUk2G5w,458
12
+ chunkr_ai/api/task_response.py,sha256=aAx7otuvsp-A0U5EaHRkbnRJMoLI8N4lOMo8bS8emJc,3843
13
+ chunkr_ai-0.0.23.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
14
+ chunkr_ai-0.0.23.dist-info/METADATA,sha256=QsO__q1V9SJYz2uugyzj_CZpOuPie6AhLfY39hTNaOM,6952
15
+ chunkr_ai-0.0.23.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
+ chunkr_ai-0.0.23.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
+ chunkr_ai-0.0.23.dist-info/RECORD,,
File without changes
@@ -1,17 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
- chunkr_ai/models.py,sha256=sEsnoJaL6wz-4R-cYg2WNl6Wmj4Ad_F8B0QuK9t2sZ8,749
3
- chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
6
- chunkr_ai/api/chunkr.py,sha256=V56SP8qs7J2QKRCRM9NGlyA1TtDTdFmGYZWbwbFTK_I,2674
7
- chunkr_ai/api/chunkr_base.py,sha256=TDqEwCCfgshggi_Mzv76FhPj5z21QP8EVj7siczvfao,9826
8
- chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
9
- chunkr_ai/api/decorators.py,sha256=UD3Nb0b5EKcwGH2kXb9FPn4GtnJovheoHeF_Gi7WFGk,2657
10
- chunkr_ai/api/misc.py,sha256=wUG4SpfEEo7NcVK47gmw42dRy9zT5F9S2DtVC4T4ERs,4877
11
- chunkr_ai/api/protocol.py,sha256=Nt8aWr4ouVwCvoLqVI5vnXJhT2cvxt0sQC-svUk2G5w,458
12
- chunkr_ai/api/task_response.py,sha256=yjlUOADqf0O9X-yF7lbZuv39ttvitZHQpbGy3DtrJ80,3909
13
- chunkr_ai-0.0.21.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- chunkr_ai-0.0.21.dist-info/METADATA,sha256=_j-KNL9Om4U587y59iKgFjy27sqlQX3wacjx4vKqMmc,5696
15
- chunkr_ai-0.0.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- chunkr_ai-0.0.21.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
- chunkr_ai-0.0.21.dist-info/RECORD,,