chunkr-ai 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
chunkr_ai/api/chunkr.py CHANGED
@@ -34,7 +34,7 @@ class Chunkr(ChunkrBase):
34
34
  file: Union[str, Path, BinaryIO, Image.Image],
35
35
  config: Configuration = None,
36
36
  ) -> TaskResponse:
37
- files = prepare_upload_data(file, config)
37
+ files = await prepare_upload_data(file, config, self._client)
38
38
  r = await self._client.post(
39
39
  f"{self.url}/api/v1/task", files=files, headers=self._headers()
40
40
  )
@@ -44,7 +44,7 @@ class Chunkr(ChunkrBase):
44
44
  @anywhere()
45
45
  @ensure_client()
46
46
  async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
47
- files = prepare_upload_data(None, config)
47
+ files = await prepare_upload_data(None, config, self._client)
48
48
  r = await self._client.patch(
49
49
  f"{self.url}/api/v1/task/{task_id}",
50
50
  files=files,
@@ -70,8 +70,8 @@ class Chunkr(ChunkrBase):
70
70
  )
71
71
  r.raise_for_status()
72
72
 
73
- @ensure_client()
74
73
  @anywhere()
74
+ @ensure_client()
75
75
  async def cancel_task(self, task_id: str) -> None:
76
76
  r = await self._client.get(
77
77
  f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
@@ -4,13 +4,10 @@ from .auth import HeadersMixin
4
4
  from abc import abstractmethod
5
5
  from dotenv import load_dotenv
6
6
  import httpx
7
- import io
8
- import json
9
7
  import os
10
8
  from pathlib import Path
11
9
  from PIL import Image
12
- import requests
13
- from typing import BinaryIO, Tuple, Union
10
+ from typing import BinaryIO, Union
14
11
 
15
12
 
16
13
  class ChunkrBase(HeadersMixin):
@@ -28,130 +25,6 @@ class ChunkrBase(HeadersMixin):
28
25
  self.url = self.url.rstrip("/")
29
26
  self._client = httpx.AsyncClient()
30
27
 
31
- def _prepare_file(
32
- self, file: Union[str, Path, BinaryIO, Image.Image]
33
- ) -> Tuple[str, BinaryIO]:
34
- """Convert various file types into a tuple of (filename, file-like object).
35
-
36
- Args:
37
- file: Input file, can be:
38
- - String or Path to a file
39
- - URL string starting with http:// or https://
40
- - Base64 string
41
- - Opened binary file (mode='rb')
42
- - PIL/Pillow Image object
43
-
44
- Returns:
45
- Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
46
-
47
- Raises:
48
- FileNotFoundError: If the file path doesn't exist
49
- TypeError: If the file type is not supported
50
- ValueError: If the URL is invalid or unreachable
51
- ValueError: If the MIME type is unsupported
52
- """
53
- # Handle URLs
54
- if isinstance(file, str) and (
55
- file.startswith("http://") or file.startswith("https://")
56
- ):
57
- response = requests.get(file)
58
- response.raise_for_status()
59
- file_obj = io.BytesIO(response.content)
60
- filename = Path(file.split("/")[-1]).name or "downloaded_file"
61
- return filename, file_obj
62
-
63
- # Handle base64 strings
64
- if isinstance(file, str) and "," in file and ";base64," in file:
65
- try:
66
- # Split header and data
67
- header, base64_data = file.split(",", 1)
68
- import base64
69
-
70
- file_bytes = base64.b64decode(base64_data)
71
- file_obj = io.BytesIO(file_bytes)
72
-
73
- # Try to determine format from header
74
- format = "bin"
75
- mime_type = header.split(":")[-1].split(";")[0].lower()
76
-
77
- # Map MIME types to file extensions
78
- mime_to_ext = {
79
- "application/pdf": "pdf",
80
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
81
- "application/msword": "doc",
82
- "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
83
- "application/vnd.ms-powerpoint": "ppt",
84
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
85
- "application/vnd.ms-excel": "xls",
86
- "image/jpeg": "jpg",
87
- "image/png": "png",
88
- "image/jpg": "jpg",
89
- }
90
-
91
- if mime_type in mime_to_ext:
92
- format = mime_to_ext[mime_type]
93
- else:
94
- raise ValueError(f"Unsupported MIME type: {mime_type}")
95
-
96
- return f"file.{format}", file_obj
97
- except Exception as e:
98
- raise ValueError(f"Invalid base64 string: {str(e)}")
99
-
100
- # Handle file paths
101
- if isinstance(file, (str, Path)):
102
- path = Path(file).resolve()
103
- if not path.exists():
104
- raise FileNotFoundError(f"File not found: {file}")
105
- return path.name, open(path, "rb")
106
-
107
- # Handle PIL Images
108
- if isinstance(file, Image.Image):
109
- img_byte_arr = io.BytesIO()
110
- format = file.format or "PNG"
111
- file.save(img_byte_arr, format=format)
112
- img_byte_arr.seek(0)
113
- return f"image.{format.lower()}", img_byte_arr
114
-
115
- # Handle file-like objects
116
- if hasattr(file, "read") and hasattr(file, "seek"):
117
- # Try to get the filename from the file object if possible
118
- name = (
119
- getattr(file, "name", "document")
120
- if hasattr(file, "name")
121
- else "document"
122
- )
123
- return Path(name).name, file
124
-
125
- raise TypeError(f"Unsupported file type: {type(file)}")
126
-
127
- def _prepare_upload_data(
128
- self,
129
- file: Union[str, Path, BinaryIO, Image.Image],
130
- config: Configuration = None,
131
- ) -> Tuple[dict, dict]:
132
- """Prepare files and data dictionaries for upload.
133
-
134
- Args:
135
- file: The file to upload
136
- config: Optional configuration settings
137
-
138
- Returns:
139
- Tuple[dict, dict]: (files dict, data dict) ready for upload
140
- """
141
- filename, file_obj = self._prepare_file(file)
142
- files = {"file": (filename, file_obj)}
143
- data = {}
144
-
145
- if config:
146
- config_dict = config.model_dump(mode="json", exclude_none=True)
147
- for key, value in config_dict.items():
148
- if isinstance(value, dict):
149
- files[key] = (None, json.dumps(value), "application/json")
150
- else:
151
- data[key] = value
152
-
153
- return files, data
154
-
155
28
  @abstractmethod
156
29
  def upload(
157
30
  self,
@@ -22,12 +22,12 @@ def anywhere():
22
22
  def wrapper(*args: P.args, **kwargs: P.kwargs) -> Union[Awaitable[T], T]:
23
23
  global _sync_loop
24
24
  try:
25
- loop = asyncio.get_running_loop()
26
- return async_func(*args, **kwargs)
25
+ asyncio.get_running_loop()
26
+ return async_func(*args, **kwargs)
27
27
  except RuntimeError:
28
28
  if _sync_loop is None:
29
29
  _sync_loop = asyncio.new_event_loop()
30
- asyncio.set_event_loop(_sync_loop)
30
+ asyncio.set_event_loop(_sync_loop)
31
31
  try:
32
32
  return _sync_loop.run_until_complete(async_func(*args, **kwargs))
33
33
  finally:
chunkr_ai/api/misc.py CHANGED
@@ -3,16 +3,36 @@ import io
3
3
  import json
4
4
  from pathlib import Path
5
5
  from PIL import Image
6
- import requests
6
+ import httpx
7
7
  from typing import Union, Tuple, BinaryIO, Optional
8
8
 
9
- def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, BinaryIO]:
10
- """Convert various file types into a tuple of (filename, file-like object)."""
9
+ async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image], client: httpx.AsyncClient = None) -> Tuple[str, BinaryIO]:
10
+ """Convert various file types into a tuple of (filename, file-like object).
11
+
12
+ Args:
13
+ file: Input file, can be:
14
+ - String or Path to a file
15
+ - URL string starting with http:// or https://
16
+ - Base64 string
17
+ - Opened binary file (mode='rb')
18
+ - PIL/Pillow Image object
19
+
20
+ Returns:
21
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
22
+
23
+ Raises:
24
+ FileNotFoundError: If the file path doesn't exist
25
+ TypeError: If the file type is not supported
26
+ ValueError: If the URL is invalid or unreachable
27
+ ValueError: If the MIME type is unsupported
28
+ """
11
29
  # Handle URLs
12
30
  if isinstance(file, str) and (
13
31
  file.startswith("http://") or file.startswith("https://")
14
32
  ):
15
- response = requests.get(file)
33
+ if not client:
34
+ raise ValueError("Client must be provided to download files from URLs")
35
+ response = client.get(file)
16
36
  response.raise_for_status()
17
37
 
18
38
  # Try to get filename from Content-Disposition header first
@@ -108,9 +128,10 @@ def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, Bi
108
128
  raise TypeError(f"Unsupported file type: {type(file)}")
109
129
 
110
130
 
111
- def prepare_upload_data(
131
+ async def prepare_upload_data(
112
132
  file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
113
133
  config: Optional[Configuration] = None,
134
+ client: httpx.AsyncClient = None,
114
135
  ) -> dict:
115
136
  """Prepare files and data dictionaries for upload.
116
137
 
@@ -123,7 +144,7 @@ def prepare_upload_data(
123
144
  """
124
145
  files = {}
125
146
  if file:
126
- filename, file_obj = prepare_file(file)
147
+ filename, file_obj = await prepare_file(file, client)
127
148
  files = {"file": (filename, file_obj)}
128
149
 
129
150
  if config:
@@ -35,10 +35,9 @@ class TaskResponse(BaseModel, Generic[T]):
35
35
  return self
36
36
  return None
37
37
 
38
+ @require_task()
38
39
  async def _poll_request(self) -> dict:
39
40
  try:
40
- if not self._client._client:
41
- raise ValueError("Client not found")
42
41
  r = await self._client._client.get(
43
42
  self.task_url, headers=self._client._headers()
44
43
  )
@@ -51,7 +50,6 @@ class TaskResponse(BaseModel, Generic[T]):
51
50
  raise
52
51
 
53
52
  @anywhere()
54
- @require_task()
55
53
  async def poll(self) -> T:
56
54
  """Poll the task for completion."""
57
55
  while True:
@@ -66,7 +64,7 @@ class TaskResponse(BaseModel, Generic[T]):
66
64
  @require_task()
67
65
  async def update(self, config: Configuration) -> T:
68
66
  """Update the task configuration."""
69
- f = prepare_upload_data(None, config)
67
+ f = await prepare_upload_data(None, config, self._client._client)
70
68
  r = await self._client._client.patch(
71
69
  self.task_url, files=f, headers=self._client._headers()
72
70
  )
chunkr_ai/models.py CHANGED
@@ -39,3 +39,4 @@ __all__ = [
39
39
  "TaskResponse",
40
40
  "Pipeline",
41
41
  ]
42
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Lumina AI INC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,19 +1,40 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.21
3
+ Version: 0.0.23
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Lumina AI INC
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
6
27
  Project-URL: Homepage, https://chunkr.ai
7
28
  Description-Content-Type: text/markdown
8
29
  License-File: LICENSE
9
30
  Requires-Dist: httpx>=0.25.0
10
31
  Requires-Dist: pillow>=10.0.0
11
32
  Requires-Dist: pydantic>=2.0.0
12
- Requires-Dist: pytest-asyncio>=0.21.0
13
33
  Requires-Dist: python-dotenv>=0.19.0
14
34
  Provides-Extra: test
15
35
  Requires-Dist: pytest>=7.0.0; extra == "test"
16
36
  Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
37
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
17
38
 
18
39
  # Chunkr Python Client
19
40
 
@@ -0,0 +1,17 @@
1
+ chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
+ chunkr_ai/models.py,sha256=MK8FPbWDj1ynvSHaYuslKCPybxLuAlrsVIM3Eym3kKI,750
3
+ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
6
+ chunkr_ai/api/chunkr.py,sha256=XTXJFs0xjYY3w3N4fSQcxtJFBtNfzFYYkh6nDlFz4cY,2714
7
+ chunkr_ai/api/chunkr_base.py,sha256=OkycHDHkdGX953_ab0XdYBnPDzSXYE30L3j52hBb8D0,5046
8
+ chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
9
+ chunkr_ai/api/decorators.py,sha256=y_Z9z0O2XXiX9z6jWDwdbCPdQyMLnjE0pGkJjHQEv_Q,2652
10
+ chunkr_ai/api/misc.py,sha256=5Q2K713VPwf3S2519KTzjT9PKhTEBgBMk1d8NNnmpZ0,5717
11
+ chunkr_ai/api/protocol.py,sha256=Nt8aWr4ouVwCvoLqVI5vnXJhT2cvxt0sQC-svUk2G5w,458
12
+ chunkr_ai/api/task_response.py,sha256=aAx7otuvsp-A0U5EaHRkbnRJMoLI8N4lOMo8bS8emJc,3843
13
+ chunkr_ai-0.0.23.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
14
+ chunkr_ai-0.0.23.dist-info/METADATA,sha256=QsO__q1V9SJYz2uugyzj_CZpOuPie6AhLfY39hTNaOM,6952
15
+ chunkr_ai-0.0.23.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
+ chunkr_ai-0.0.23.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
+ chunkr_ai-0.0.23.dist-info/RECORD,,
File without changes
@@ -1,17 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
- chunkr_ai/models.py,sha256=sEsnoJaL6wz-4R-cYg2WNl6Wmj4Ad_F8B0QuK9t2sZ8,749
3
- chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
6
- chunkr_ai/api/chunkr.py,sha256=V56SP8qs7J2QKRCRM9NGlyA1TtDTdFmGYZWbwbFTK_I,2674
7
- chunkr_ai/api/chunkr_base.py,sha256=TDqEwCCfgshggi_Mzv76FhPj5z21QP8EVj7siczvfao,9826
8
- chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
9
- chunkr_ai/api/decorators.py,sha256=UD3Nb0b5EKcwGH2kXb9FPn4GtnJovheoHeF_Gi7WFGk,2657
10
- chunkr_ai/api/misc.py,sha256=wUG4SpfEEo7NcVK47gmw42dRy9zT5F9S2DtVC4T4ERs,4877
11
- chunkr_ai/api/protocol.py,sha256=Nt8aWr4ouVwCvoLqVI5vnXJhT2cvxt0sQC-svUk2G5w,458
12
- chunkr_ai/api/task_response.py,sha256=yjlUOADqf0O9X-yF7lbZuv39ttvitZHQpbGy3DtrJ80,3909
13
- chunkr_ai-0.0.21.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- chunkr_ai-0.0.21.dist-info/METADATA,sha256=_j-KNL9Om4U587y59iKgFjy27sqlQX3wacjx4vKqMmc,5696
15
- chunkr_ai-0.0.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- chunkr_ai-0.0.21.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
- chunkr_ai-0.0.21.dist-info/RECORD,,