chunkr-ai 0.0.22__py3-none-any.whl → 0.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/chunkr.py CHANGED
@@ -34,7 +34,7 @@ class Chunkr(ChunkrBase):
34
34
  file: Union[str, Path, BinaryIO, Image.Image],
35
35
  config: Configuration = None,
36
36
  ) -> TaskResponse:
37
- files = prepare_upload_data(file, config)
37
+ files = await prepare_upload_data(file, config, self._client)
38
38
  r = await self._client.post(
39
39
  f"{self.url}/api/v1/task", files=files, headers=self._headers()
40
40
  )
@@ -44,7 +44,7 @@ class Chunkr(ChunkrBase):
44
44
  @anywhere()
45
45
  @ensure_client()
46
46
  async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
47
- files = prepare_upload_data(None, config)
47
+ files = await prepare_upload_data(None, config, self._client)
48
48
  r = await self._client.patch(
49
49
  f"{self.url}/api/v1/task/{task_id}",
50
50
  files=files,
@@ -4,13 +4,10 @@ from .auth import HeadersMixin
4
4
  from abc import abstractmethod
5
5
  from dotenv import load_dotenv
6
6
  import httpx
7
- import io
8
- import json
9
7
  import os
10
8
  from pathlib import Path
11
9
  from PIL import Image
12
- import requests
13
- from typing import BinaryIO, Tuple, Union
10
+ from typing import BinaryIO, Union
14
11
 
15
12
 
16
13
  class ChunkrBase(HeadersMixin):
@@ -28,130 +25,6 @@ class ChunkrBase(HeadersMixin):
28
25
  self.url = self.url.rstrip("/")
29
26
  self._client = httpx.AsyncClient()
30
27
 
31
- def _prepare_file(
32
- self, file: Union[str, Path, BinaryIO, Image.Image]
33
- ) -> Tuple[str, BinaryIO]:
34
- """Convert various file types into a tuple of (filename, file-like object).
35
-
36
- Args:
37
- file: Input file, can be:
38
- - String or Path to a file
39
- - URL string starting with http:// or https://
40
- - Base64 string
41
- - Opened binary file (mode='rb')
42
- - PIL/Pillow Image object
43
-
44
- Returns:
45
- Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
46
-
47
- Raises:
48
- FileNotFoundError: If the file path doesn't exist
49
- TypeError: If the file type is not supported
50
- ValueError: If the URL is invalid or unreachable
51
- ValueError: If the MIME type is unsupported
52
- """
53
- # Handle URLs
54
- if isinstance(file, str) and (
55
- file.startswith("http://") or file.startswith("https://")
56
- ):
57
- response = requests.get(file)
58
- response.raise_for_status()
59
- file_obj = io.BytesIO(response.content)
60
- filename = Path(file.split("/")[-1]).name or "downloaded_file"
61
- return filename, file_obj
62
-
63
- # Handle base64 strings
64
- if isinstance(file, str) and "," in file and ";base64," in file:
65
- try:
66
- # Split header and data
67
- header, base64_data = file.split(",", 1)
68
- import base64
69
-
70
- file_bytes = base64.b64decode(base64_data)
71
- file_obj = io.BytesIO(file_bytes)
72
-
73
- # Try to determine format from header
74
- format = "bin"
75
- mime_type = header.split(":")[-1].split(";")[0].lower()
76
-
77
- # Map MIME types to file extensions
78
- mime_to_ext = {
79
- "application/pdf": "pdf",
80
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
81
- "application/msword": "doc",
82
- "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
83
- "application/vnd.ms-powerpoint": "ppt",
84
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
85
- "application/vnd.ms-excel": "xls",
86
- "image/jpeg": "jpg",
87
- "image/png": "png",
88
- "image/jpg": "jpg",
89
- }
90
-
91
- if mime_type in mime_to_ext:
92
- format = mime_to_ext[mime_type]
93
- else:
94
- raise ValueError(f"Unsupported MIME type: {mime_type}")
95
-
96
- return f"file.{format}", file_obj
97
- except Exception as e:
98
- raise ValueError(f"Invalid base64 string: {str(e)}")
99
-
100
- # Handle file paths
101
- if isinstance(file, (str, Path)):
102
- path = Path(file).resolve()
103
- if not path.exists():
104
- raise FileNotFoundError(f"File not found: {file}")
105
- return path.name, open(path, "rb")
106
-
107
- # Handle PIL Images
108
- if isinstance(file, Image.Image):
109
- img_byte_arr = io.BytesIO()
110
- format = file.format or "PNG"
111
- file.save(img_byte_arr, format=format)
112
- img_byte_arr.seek(0)
113
- return f"image.{format.lower()}", img_byte_arr
114
-
115
- # Handle file-like objects
116
- if hasattr(file, "read") and hasattr(file, "seek"):
117
- # Try to get the filename from the file object if possible
118
- name = (
119
- getattr(file, "name", "document")
120
- if hasattr(file, "name")
121
- else "document"
122
- )
123
- return Path(name).name, file
124
-
125
- raise TypeError(f"Unsupported file type: {type(file)}")
126
-
127
- def _prepare_upload_data(
128
- self,
129
- file: Union[str, Path, BinaryIO, Image.Image],
130
- config: Configuration = None,
131
- ) -> Tuple[dict, dict]:
132
- """Prepare files and data dictionaries for upload.
133
-
134
- Args:
135
- file: The file to upload
136
- config: Optional configuration settings
137
-
138
- Returns:
139
- Tuple[dict, dict]: (files dict, data dict) ready for upload
140
- """
141
- filename, file_obj = self._prepare_file(file)
142
- files = {"file": (filename, file_obj)}
143
- data = {}
144
-
145
- if config:
146
- config_dict = config.model_dump(mode="json", exclude_none=True)
147
- for key, value in config_dict.items():
148
- if isinstance(value, dict):
149
- files[key] = (None, json.dumps(value), "application/json")
150
- else:
151
- data[key] = value
152
-
153
- return files, data
154
-
155
28
  @abstractmethod
156
29
  def upload(
157
30
  self,
chunkr_ai/api/misc.py CHANGED
@@ -3,16 +3,36 @@ import io
3
3
  import json
4
4
  from pathlib import Path
5
5
  from PIL import Image
6
- import requests
6
+ import httpx
7
7
  from typing import Union, Tuple, BinaryIO, Optional
8
8
 
9
- def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, BinaryIO]:
10
- """Convert various file types into a tuple of (filename, file-like object)."""
9
+ async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image], client: httpx.AsyncClient = None) -> Tuple[str, BinaryIO]:
10
+ """Convert various file types into a tuple of (filename, file-like object).
11
+
12
+ Args:
13
+ file: Input file, can be:
14
+ - String or Path to a file
15
+ - URL string starting with http:// or https://
16
+ - Base64 string
17
+ - Opened binary file (mode='rb')
18
+ - PIL/Pillow Image object
19
+
20
+ Returns:
21
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
22
+
23
+ Raises:
24
+ FileNotFoundError: If the file path doesn't exist
25
+ TypeError: If the file type is not supported
26
+ ValueError: If the URL is invalid or unreachable
27
+ ValueError: If the MIME type is unsupported
28
+ """
11
29
  # Handle URLs
12
30
  if isinstance(file, str) and (
13
31
  file.startswith("http://") or file.startswith("https://")
14
32
  ):
15
- response = requests.get(file)
33
+ if not client:
34
+ raise ValueError("Client must be provided to download files from URLs")
35
+ response = client.get(file)
16
36
  response.raise_for_status()
17
37
 
18
38
  # Try to get filename from Content-Disposition header first
@@ -108,9 +128,10 @@ def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, Bi
108
128
  raise TypeError(f"Unsupported file type: {type(file)}")
109
129
 
110
130
 
111
- def prepare_upload_data(
131
+ async def prepare_upload_data(
112
132
  file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
113
133
  config: Optional[Configuration] = None,
134
+ client: httpx.AsyncClient = None,
114
135
  ) -> dict:
115
136
  """Prepare files and data dictionaries for upload.
116
137
 
@@ -123,7 +144,7 @@ def prepare_upload_data(
123
144
  """
124
145
  files = {}
125
146
  if file:
126
- filename, file_obj = prepare_file(file)
147
+ filename, file_obj = await prepare_file(file, client)
127
148
  files = {"file": (filename, file_obj)}
128
149
 
129
150
  if config:
@@ -35,10 +35,9 @@ class TaskResponse(BaseModel, Generic[T]):
35
35
  return self
36
36
  return None
37
37
 
38
+ @require_task()
38
39
  async def _poll_request(self) -> dict:
39
40
  try:
40
- if not self._client._client:
41
- raise ValueError("Client not found")
42
41
  r = await self._client._client.get(
43
42
  self.task_url, headers=self._client._headers()
44
43
  )
@@ -51,7 +50,6 @@ class TaskResponse(BaseModel, Generic[T]):
51
50
  raise
52
51
 
53
52
  @anywhere()
54
- @require_task()
55
53
  async def poll(self) -> T:
56
54
  """Poll the task for completion."""
57
55
  while True:
@@ -66,7 +64,7 @@ class TaskResponse(BaseModel, Generic[T]):
66
64
  @require_task()
67
65
  async def update(self, config: Configuration) -> T:
68
66
  """Update the task configuration."""
69
- f = prepare_upload_data(None, config)
67
+ f = await prepare_upload_data(None, config, self._client._client)
70
68
  r = await self._client._client.patch(
71
69
  self.task_url, files=f, headers=self._client._headers()
72
70
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.22
3
+ Version: 0.0.23
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -3,15 +3,15 @@ chunkr_ai/models.py,sha256=MK8FPbWDj1ynvSHaYuslKCPybxLuAlrsVIM3Eym3kKI,750
3
3
  chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
6
- chunkr_ai/api/chunkr.py,sha256=ZjDVUeR1KPiLLHbj7AJipt-prBTw7a_GvjJ0nd7JV7c,2674
7
- chunkr_ai/api/chunkr_base.py,sha256=TDqEwCCfgshggi_Mzv76FhPj5z21QP8EVj7siczvfao,9826
6
+ chunkr_ai/api/chunkr.py,sha256=XTXJFs0xjYY3w3N4fSQcxtJFBtNfzFYYkh6nDlFz4cY,2714
7
+ chunkr_ai/api/chunkr_base.py,sha256=OkycHDHkdGX953_ab0XdYBnPDzSXYE30L3j52hBb8D0,5046
8
8
  chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
9
9
  chunkr_ai/api/decorators.py,sha256=y_Z9z0O2XXiX9z6jWDwdbCPdQyMLnjE0pGkJjHQEv_Q,2652
10
- chunkr_ai/api/misc.py,sha256=wUG4SpfEEo7NcVK47gmw42dRy9zT5F9S2DtVC4T4ERs,4877
10
+ chunkr_ai/api/misc.py,sha256=5Q2K713VPwf3S2519KTzjT9PKhTEBgBMk1d8NNnmpZ0,5717
11
11
  chunkr_ai/api/protocol.py,sha256=Nt8aWr4ouVwCvoLqVI5vnXJhT2cvxt0sQC-svUk2G5w,458
12
- chunkr_ai/api/task_response.py,sha256=yjlUOADqf0O9X-yF7lbZuv39ttvitZHQpbGy3DtrJ80,3909
13
- chunkr_ai-0.0.22.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
14
- chunkr_ai-0.0.22.dist-info/METADATA,sha256=ro8wcqBnNGLkIhh7XrbSxdl4zaWXhA0d5SUgBQteAec,6952
15
- chunkr_ai-0.0.22.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- chunkr_ai-0.0.22.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
- chunkr_ai-0.0.22.dist-info/RECORD,,
12
+ chunkr_ai/api/task_response.py,sha256=aAx7otuvsp-A0U5EaHRkbnRJMoLI8N4lOMo8bS8emJc,3843
13
+ chunkr_ai-0.0.23.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
14
+ chunkr_ai-0.0.23.dist-info/METADATA,sha256=QsO__q1V9SJYz2uugyzj_CZpOuPie6AhLfY39hTNaOM,6952
15
+ chunkr_ai-0.0.23.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
+ chunkr_ai-0.0.23.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
+ chunkr_ai-0.0.23.dist-info/RECORD,,