chunkr-ai 0.0.35__tar.gz → 0.0.37__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {chunkr_ai-0.0.35/src/chunkr_ai.egg-info → chunkr_ai-0.0.37}/PKG-INFO +1 -1
  2. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/pyproject.toml +1 -1
  3. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/api/chunkr.py +11 -7
  4. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/api/chunkr_base.py +9 -7
  5. chunkr_ai-0.0.37/src/chunkr_ai/api/misc.py +103 -0
  6. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/api/task_response.py +31 -24
  7. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
  8. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/tests/test_chunkr.py +53 -0
  9. chunkr_ai-0.0.35/src/chunkr_ai/api/misc.py +0 -155
  10. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/LICENSE +0 -0
  11. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/README.md +0 -0
  12. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/setup.cfg +0 -0
  13. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/__init__.py +0 -0
  14. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/api/__init__.py +0 -0
  15. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/api/auth.py +0 -0
  16. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/api/configuration.py +0 -0
  17. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/api/decorators.py +0 -0
  18. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/api/protocol.py +0 -0
  19. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai/models.py +0 -0
  20. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  21. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  22. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai.egg-info/requires.txt +0 -0
  23. {chunkr_ai-0.0.35 → chunkr_ai-0.0.37}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.35
3
+ Version: 0.0.37
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.35"
7
+ version = "0.0.37"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  from pathlib import Path
2
2
  from PIL import Image
3
- from typing import Union, BinaryIO
3
+ from typing import Union, BinaryIO, Optional
4
4
 
5
5
  from .configuration import Configuration
6
6
  from .decorators import anywhere, ensure_client, retry_on_429
@@ -17,8 +17,9 @@ class Chunkr(ChunkrBase):
17
17
  self,
18
18
  file: Union[str, Path, BinaryIO, Image.Image],
19
19
  config: Configuration = None,
20
+ filename: Optional[str] = None,
20
21
  ) -> TaskResponse:
21
- task = await self.create_task(file, config)
22
+ task = await self.create_task(file, config, filename)
22
23
  return await task.poll()
23
24
 
24
25
  @anywhere()
@@ -34,10 +35,12 @@ class Chunkr(ChunkrBase):
34
35
  self,
35
36
  file: Union[str, Path, BinaryIO, Image.Image],
36
37
  config: Configuration = None,
38
+ filename: Optional[str] = None,
37
39
  ) -> TaskResponse:
38
- files = await prepare_upload_data(file, config, self._client)
40
+ """Create a new task with the given file and configuration."""
41
+ data = await prepare_upload_data(file, filename, config)
39
42
  r = await self._client.post(
40
- f"{self.url}/api/v1/task", files=files, headers=self._headers()
43
+ f"{self.url}/api/v1/task/parse", json=data, headers=self._headers()
41
44
  )
42
45
  r.raise_for_status()
43
46
  return TaskResponse(**r.json()).with_client(self, True, False)
@@ -46,10 +49,11 @@ class Chunkr(ChunkrBase):
46
49
  @ensure_client()
47
50
  @retry_on_429()
48
51
  async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
49
- files = await prepare_upload_data(None, config, self._client)
52
+ """Update an existing task with new configuration."""
53
+ data = await prepare_upload_data(None, None, config)
50
54
  r = await self._client.patch(
51
- f"{self.url}/api/v1/task/{task_id}",
52
- files=files,
55
+ f"{self.url}/api/v1/task/{task_id}/parse",
56
+ json=data,
53
57
  headers=self._headers(),
54
58
  )
55
59
  r.raise_for_status()
@@ -7,8 +7,7 @@ import httpx
7
7
  import os
8
8
  from pathlib import Path
9
9
  from PIL import Image
10
- from typing import BinaryIO, Union
11
-
10
+ from typing import BinaryIO, Union, Optional
12
11
 
13
12
  class ChunkrBase(HeadersMixin):
14
13
  """Base class with shared functionality for Chunkr API clients.
@@ -20,7 +19,7 @@ class ChunkrBase(HeadersMixin):
20
19
  """
21
20
 
22
21
  def __init__(self, url: str = None, api_key: str = None, raise_on_failure: bool = False):
23
- load_dotenv()
22
+ load_dotenv(override=True)
24
23
  self.url = url or os.getenv("CHUNKR_URL") or "https://api.chunkr.ai"
25
24
  self._api_key = api_key or os.getenv("CHUNKR_API_KEY")
26
25
  self.raise_on_failure = raise_on_failure
@@ -38,13 +37,15 @@ class ChunkrBase(HeadersMixin):
38
37
  self,
39
38
  file: Union[str, Path, BinaryIO, Image.Image],
40
39
  config: Configuration = None,
40
+ filename: Optional[str] = None,
41
41
  ) -> TaskResponse:
42
42
  """Upload a file and wait for processing to complete.
43
43
 
44
44
  Args:
45
45
  file: The file to upload.
46
46
  config: Configuration options for processing. Optional.
47
-
47
+ filename: The filename to use for the file. Optional.
48
+
48
49
  Examples:
49
50
  ```python
50
51
  # Upload from file path
@@ -58,7 +59,7 @@ class ChunkrBase(HeadersMixin):
58
59
  await chunkr.upload("https://example.com/document.pdf")
59
60
 
60
61
  # Upload from base64 string (must include MIME type header)
61
- await chunkr.upload("data:application/pdf;base64,JVBERi0...")
62
+ await chunkr.upload("data:application/pdf;base64,JVBERi0...", filename="document.pdf")
62
63
 
63
64
  # Upload an image
64
65
  from PIL import Image
@@ -90,13 +91,14 @@ class ChunkrBase(HeadersMixin):
90
91
  self,
91
92
  file: Union[str, Path, BinaryIO, Image.Image],
92
93
  config: Configuration = None,
94
+ filename: Optional[str] = None,
93
95
  ) -> TaskResponse:
94
96
  """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
95
97
 
96
98
  Args:
97
99
  file: The file to upload.
98
100
  config: Configuration options for processing. Optional.
99
-
101
+ filename: The filename to use for the file. Optional.
100
102
  Examples:
101
103
  ```
102
104
  # Upload from file path
@@ -110,7 +112,7 @@ class ChunkrBase(HeadersMixin):
110
112
  task = await chunkr.create_task("https://example.com/document.pdf")
111
113
 
112
114
  # Upload from base64 string (must include MIME type header)
113
- task = await chunkr.create_task("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
115
+ task = await chunkr.create_task("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...", filename="document.pdf")
114
116
 
115
117
  # Upload an image
116
118
  from PIL import Image
@@ -0,0 +1,103 @@
1
+ from .configuration import Configuration
2
+ import base64
3
+ import io
4
+ from pathlib import Path
5
+ from PIL import Image
6
+ from typing import Union, Tuple, BinaryIO, Optional
7
+
8
+ async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[Optional[str], str]:
9
+ """Convert various file types into a tuple of (filename, file content).
10
+
11
+ Args:
12
+ file: Input file, can be:
13
+ - URL string starting with http:// or https://
14
+ - Base64 string
15
+ - Local file path (will be converted to base64)
16
+ - Opened binary file (will be converted to base64)
17
+ - PIL/Pillow Image object (will be converted to base64)
18
+
19
+ Returns:
20
+ Tuple[Optional[str], str]: (filename, content) where content is either a URL or base64 string
21
+ The filename may be None for URLs, base64 strings, and PIL Images
22
+
23
+ Raises:
24
+ FileNotFoundError: If the file path doesn't exist
25
+ TypeError: If the file type is not supported
26
+ ValueError: If the URL is invalid or unreachable
27
+ ValueError: If the MIME type is unsupported
28
+ """
29
+ # Handle strings
30
+ if isinstance(file, str):
31
+ if file.startswith(('http://', 'https://')):
32
+ return None, file
33
+ try:
34
+ base64.b64decode(file)
35
+ return None, file
36
+ except:
37
+ try:
38
+ file = Path(file)
39
+ except:
40
+ raise ValueError("File must be a valid path, URL, or base64 string")
41
+
42
+ # Handle file paths - convert to base64
43
+ if isinstance(file, Path):
44
+ path = Path(file).resolve()
45
+ if not path.exists():
46
+ raise FileNotFoundError(f"File not found: {file}")
47
+
48
+ with open(path, "rb") as f:
49
+ file_content = f.read()
50
+ file_ext = path.suffix.lower().lstrip('.')
51
+ if not file_ext:
52
+ raise ValueError("File must have an extension")
53
+ base64_str = base64.b64encode(file_content).decode()
54
+ return path.name, base64_str
55
+
56
+ # Handle PIL Images - convert to base64
57
+ if isinstance(file, Image.Image):
58
+ img_byte_arr = io.BytesIO()
59
+ format = file.format or "PNG"
60
+ file.save(img_byte_arr, format=format)
61
+ img_byte_arr.seek(0)
62
+ base64_str = base64.b64encode(img_byte_arr.getvalue()).decode()
63
+ return None, base64_str
64
+
65
+ # Handle file-like objects - convert to base64
66
+ if hasattr(file, "read") and hasattr(file, "seek"):
67
+ file.seek(0)
68
+ file_content = file.read()
69
+ name = getattr(file, "name", "document")
70
+ file_ext = Path(name).suffix.lower().lstrip('.')
71
+ if not file_ext:
72
+ raise ValueError("File must have an extension")
73
+ base64_str = base64.b64encode(file_content).decode()
74
+ return Path(name).name, base64_str
75
+
76
+ raise TypeError(f"Unsupported file type: {type(file)}")
77
+
78
+
79
+ async def prepare_upload_data(
80
+ file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
81
+ filename: Optional[str] = None,
82
+ config: Optional[Configuration] = None,
83
+ ) -> dict:
84
+ """Prepare data dictionary for upload.
85
+
86
+ Args:
87
+ file: The file to upload
88
+ config: Optional configuration settings
89
+ client: HTTP client for downloading remote files
90
+
91
+ Returns:
92
+ dict: JSON-serializable data dictionary ready for upload
93
+ """
94
+ data = {}
95
+ if file:
96
+ processed_filename, processed_file = await prepare_file(file)
97
+ data["file"] = processed_file
98
+ data["file_name"] = filename or processed_filename
99
+
100
+ if config:
101
+ data.update(config.model_dump(mode="json", exclude_none=True))
102
+
103
+ return data
@@ -74,9 +74,11 @@ class TaskResponse(BaseModel, Generic[T]):
74
74
  @retry_on_429()
75
75
  async def update(self, config: Configuration) -> T:
76
76
  """Update the task configuration."""
77
- f = await prepare_upload_data(None, config, self._client._client)
77
+ data = await prepare_upload_data(None, None, config)
78
78
  r = await self._client._client.patch(
79
- self.task_url, files=f, headers=self._client._headers()
79
+ f"{self.task_url}/parse",
80
+ json=data,
81
+ headers=self._client._headers()
80
82
  )
81
83
  r.raise_for_status()
82
84
  updated = TaskResponse(**r.json()).with_client(self._client)
@@ -103,6 +105,29 @@ class TaskResponse(BaseModel, Generic[T]):
103
105
  r.raise_for_status()
104
106
  return await self.poll()
105
107
 
108
+ def _write_to_file(self, content: str | dict, output_file: str, is_json: bool = False) -> None:
109
+ """Helper method to write content to a file
110
+
111
+ Args:
112
+ content: Content to write (string or dict for JSON)
113
+ output_file: Path to save the content
114
+ is_json: Whether the content should be written as JSON
115
+ """
116
+ class DateTimeEncoder(json.JSONEncoder):
117
+ def default(self, obj):
118
+ if isinstance(obj, datetime):
119
+ return obj.isoformat()
120
+ return super().default(obj)
121
+ if output_file:
122
+ directory = os.path.dirname(output_file)
123
+ if directory:
124
+ os.makedirs(directory, exist_ok=True)
125
+ with open(output_file, "w", encoding="utf-8") as f:
126
+ if is_json:
127
+ json.dump(content, f, cls=DateTimeEncoder, indent=2)
128
+ else:
129
+ f.write(content)
130
+
106
131
  def html(self, output_file: str = None) -> str:
107
132
  """Get the full HTML of the task
108
133
 
@@ -110,10 +135,7 @@ class TaskResponse(BaseModel, Generic[T]):
110
135
  output_file (str, optional): Path to save the HTML content. Defaults to None.
111
136
  """
112
137
  content = self._get_content("html")
113
- if output_file:
114
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
115
- with open(output_file, "w", encoding="utf-8") as f:
116
- f.write(content)
138
+ self._write_to_file(content, output_file)
117
139
  return content
118
140
 
119
141
  def markdown(self, output_file: str = None) -> str:
@@ -123,10 +145,7 @@ class TaskResponse(BaseModel, Generic[T]):
123
145
  output_file (str, optional): Path to save the markdown content. Defaults to None.
124
146
  """
125
147
  content = self._get_content("markdown")
126
- if output_file:
127
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
128
- with open(output_file, "w", encoding="utf-8") as f:
129
- f.write(content)
148
+ self._write_to_file(content, output_file)
130
149
  return content
131
150
 
132
151
  def content(self, output_file: str = None) -> str:
@@ -136,10 +155,7 @@ class TaskResponse(BaseModel, Generic[T]):
136
155
  output_file (str, optional): Path to save the content. Defaults to None.
137
156
  """
138
157
  content = self._get_content("content")
139
- if output_file:
140
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
141
- with open(output_file, "w", encoding="utf-8") as f:
142
- f.write(content)
158
+ self._write_to_file(content, output_file)
143
159
  return content
144
160
 
145
161
  def json(self, output_file: str = None) -> dict:
@@ -148,17 +164,8 @@ class TaskResponse(BaseModel, Generic[T]):
148
164
  Args:
149
165
  output_file (str, optional): Path to save the task data as JSON. Defaults to None.
150
166
  """
151
- class DateTimeEncoder(json.JSONEncoder):
152
- def default(self, obj):
153
- if isinstance(obj, datetime):
154
- return obj.isoformat()
155
- return super().default(obj)
156
-
157
167
  data = self.model_dump()
158
- if output_file:
159
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
160
- with open(output_file, "w", encoding="utf-8") as f:
161
- json.dump(data, f, cls=DateTimeEncoder, indent=2)
168
+ self._write_to_file(data, output_file, is_json=True)
162
169
  return data
163
170
 
164
171
  def _get_content(self, t: str) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.35
3
+ Version: 0.0.37
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -2,6 +2,7 @@ import pytest
2
2
  from pathlib import Path
3
3
  from PIL import Image
4
4
  import asyncio
5
+ import base64
5
6
 
6
7
  from chunkr_ai import Chunkr
7
8
  from chunkr_ai.models import (
@@ -67,6 +68,8 @@ async def test_send_pil_image(client, sample_image):
67
68
  response = await client.upload(sample_image)
68
69
  assert response.task_id is not None
69
70
  assert response.status == "Succeeded"
71
+ assert response.output is not None
72
+ assert response.output is not None
70
73
 
71
74
  @pytest.mark.asyncio
72
75
  async def test_ocr_auto(client, sample_path):
@@ -220,3 +223,53 @@ async def test_task_operations_after_client_close(client, sample_path):
220
223
  await client.close()
221
224
  result = await task.poll()
222
225
  assert result.status == "Succeeded"
226
+
227
+ @pytest.mark.asyncio
228
+ async def test_send_base64_file(client, sample_path):
229
+ # Read file and convert to base64
230
+ with open(sample_path, "rb") as f:
231
+ base64_content = base64.b64encode(f.read()).decode('utf-8')
232
+ response = await client.upload(base64_content)
233
+ assert response.task_id is not None
234
+ assert response.status == "Succeeded"
235
+ assert response.output is not None
236
+
237
+ @pytest.mark.asyncio
238
+ async def test_send_base64_file_with_filename(client, sample_path):
239
+ # Read file and convert to base64
240
+ with open(sample_path, "rb") as f:
241
+ base64_content = base64.b64encode(f.read()).decode('utf-8')
242
+
243
+ response = await client.upload(base64_content, filename="test.pdf")
244
+ assert response.task_id is not None
245
+ assert response.status == "Succeeded"
246
+ assert response.output is not None
247
+
248
+ @pytest.mark.asyncio
249
+ async def test_output_files_no_dir(client, sample_path, tmp_path):
250
+ await client.upload(sample_path)
251
+
252
+ html_file = tmp_path / "output.html"
253
+ md_file = tmp_path / "output.md"
254
+ content_file = tmp_path / "output.txt"
255
+ json_file = tmp_path / "output.json"
256
+
257
+ assert html_file.exists()
258
+ assert md_file.exists()
259
+ assert content_file.exists()
260
+ assert json_file.exists()
261
+
262
+ @pytest.mark.asyncio
263
+ async def test_output_files_with_dirs(client, sample_path, tmp_path):
264
+ await client.upload(sample_path)
265
+
266
+ nested_dir = tmp_path / "nested" / "output" / "dir"
267
+ html_file = nested_dir / "output.html"
268
+ md_file = nested_dir / "output.md"
269
+ content_file = nested_dir / "output.txt"
270
+ json_file = nested_dir / "output.json"
271
+
272
+ assert html_file.exists()
273
+ assert md_file.exists()
274
+ assert content_file.exists()
275
+ assert json_file.exists()
@@ -1,155 +0,0 @@
1
- from .configuration import Configuration
2
- import io
3
- import json
4
- from pathlib import Path
5
- from PIL import Image
6
- import httpx
7
- from typing import Union, Tuple, BinaryIO, Optional
8
-
9
- async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image], client: httpx.AsyncClient = None) -> Tuple[str, BinaryIO]:
10
- """Convert various file types into a tuple of (filename, file-like object).
11
-
12
- Args:
13
- file: Input file, can be:
14
- - String or Path to a file
15
- - URL string starting with http:// or https://
16
- - Base64 string
17
- - Opened binary file (mode='rb')
18
- - PIL/Pillow Image object
19
-
20
- Returns:
21
- Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
22
-
23
- Raises:
24
- FileNotFoundError: If the file path doesn't exist
25
- TypeError: If the file type is not supported
26
- ValueError: If the URL is invalid or unreachable
27
- ValueError: If the MIME type is unsupported
28
- """
29
- # Handle URLs
30
- if isinstance(file, str) and (
31
- file.startswith("http://") or file.startswith("https://")
32
- ):
33
- if not client:
34
- raise ValueError("Client must be provided to download files from URLs")
35
- response = await client.get(file)
36
- response.raise_for_status()
37
-
38
- # Try to get filename from Content-Disposition header first
39
- filename = None
40
- content_disposition = response.headers.get("Content-Disposition")
41
- if content_disposition and "filename=" in content_disposition:
42
- filename = content_disposition.split("filename=")[-1].strip("\"'")
43
-
44
- # If no Content-Disposition, try to get clean filename from URL path
45
- if not filename:
46
- from urllib.parse import urlparse, unquote
47
-
48
- parsed_url = urlparse(file)
49
- path = unquote(parsed_url.path)
50
- filename = Path(path).name if path else None
51
-
52
- # Fallback to default name if we couldn't extract one
53
- filename = filename or "downloaded_file"
54
-
55
- # Sanitize filename: remove invalid characters and limit length
56
- import re
57
-
58
- filename = re.sub(
59
- r'[<>:"/\\|?*%]', "_", filename
60
- ) # Replace invalid chars with underscore
61
- filename = re.sub(r"\s+", "_", filename) # Replace whitespace with underscore
62
- filename = filename.strip("._") # Remove leading/trailing dots and underscores
63
- filename = filename[:255] # Limit length to 255 characters
64
-
65
- file_obj = io.BytesIO(response.content)
66
- return filename, file_obj
67
-
68
- # Handle base64 strings
69
- if isinstance(file, str) and "," in file and ";base64," in file:
70
- try:
71
- # Split header and data
72
- header, base64_data = file.split(",", 1)
73
- import base64
74
-
75
- file_bytes = base64.b64decode(base64_data)
76
- file_obj = io.BytesIO(file_bytes)
77
-
78
- # Try to determine format from header
79
- format = "bin"
80
- mime_type = header.split(":")[-1].split(";")[0].lower()
81
-
82
- # Map MIME types to file extensions
83
- mime_to_ext = {
84
- "application/pdf": "pdf",
85
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
86
- "application/msword": "doc",
87
- "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
88
- "application/vnd.ms-powerpoint": "ppt",
89
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
90
- "application/vnd.ms-excel": "xls",
91
- "image/jpeg": "jpg",
92
- "image/png": "png",
93
- "image/jpg": "jpg",
94
- }
95
-
96
- if mime_type in mime_to_ext:
97
- format = mime_to_ext[mime_type]
98
- else:
99
- raise ValueError(f"Unsupported MIME type: {mime_type}")
100
-
101
- return f"file.{format}", file_obj
102
- except Exception as e:
103
- raise ValueError(f"Invalid base64 string: {str(e)}")
104
-
105
- # Handle file paths
106
- if isinstance(file, (str, Path)):
107
- path = Path(file).resolve()
108
- if not path.exists():
109
- raise FileNotFoundError(f"File not found: {file}")
110
- return path.name, open(path, "rb")
111
-
112
- # Handle PIL Images
113
- if isinstance(file, Image.Image):
114
- img_byte_arr = io.BytesIO()
115
- format = file.format or "PNG"
116
- file.save(img_byte_arr, format=format)
117
- img_byte_arr.seek(0)
118
- return f"image.{format.lower()}", img_byte_arr
119
-
120
- # Handle file-like objects
121
- if hasattr(file, "read") and hasattr(file, "seek"):
122
- # Try to get the filename from the file object if possible
123
- name = (
124
- getattr(file, "name", "document") if hasattr(file, "name") else "document"
125
- )
126
- return Path(name).name, file
127
-
128
- raise TypeError(f"Unsupported file type: {type(file)}")
129
-
130
-
131
- async def prepare_upload_data(
132
- file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
133
- config: Optional[Configuration] = None,
134
- client: httpx.AsyncClient = None,
135
- ) -> dict:
136
- """Prepare files and data dictionaries for upload.
137
-
138
- Args:
139
- file: The file to upload
140
- config: Optional configuration settings
141
-
142
- Returns:
143
- dict: (files dict) ready for upload
144
- """
145
- files = {}
146
- if file:
147
- filename, file_obj = await prepare_file(file, client)
148
- files = {"file": (filename, file_obj)}
149
-
150
- if config:
151
- config_dict = config.model_dump(mode="json", exclude_none=True)
152
- for key, value in config_dict.items():
153
- files[key] = (None, json.dumps(value), "application/json")
154
-
155
- return files
File without changes
File without changes
File without changes