chunkr-ai 0.0.22__tar.gz → 0.0.23__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (23) hide show
  1. {chunkr_ai-0.0.22/src/chunkr_ai.egg-info → chunkr_ai-0.0.23}/PKG-INFO +1 -1
  2. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/pyproject.toml +1 -1
  3. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/chunkr.py +2 -2
  4. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/chunkr_base.py +1 -128
  5. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/misc.py +27 -6
  6. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/task_response.py +2 -4
  7. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
  8. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/LICENSE +0 -0
  9. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/README.md +0 -0
  10. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/setup.cfg +0 -0
  11. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/__init__.py +0 -0
  12. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/__init__.py +0 -0
  13. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/api.py +0 -0
  14. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/auth.py +0 -0
  15. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/config.py +0 -0
  16. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/decorators.py +0 -0
  17. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/api/protocol.py +0 -0
  18. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai/models.py +0 -0
  19. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  20. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  21. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai.egg-info/requires.txt +0 -0
  22. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/src/chunkr_ai.egg-info/top_level.txt +0 -0
  23. {chunkr_ai-0.0.22 → chunkr_ai-0.0.23}/tests/test_chunkr.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.22
3
+ Version: 0.0.23
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.22"
7
+ version = "0.0.23"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -34,7 +34,7 @@ class Chunkr(ChunkrBase):
34
34
  file: Union[str, Path, BinaryIO, Image.Image],
35
35
  config: Configuration = None,
36
36
  ) -> TaskResponse:
37
- files = prepare_upload_data(file, config)
37
+ files = await prepare_upload_data(file, config, self._client)
38
38
  r = await self._client.post(
39
39
  f"{self.url}/api/v1/task", files=files, headers=self._headers()
40
40
  )
@@ -44,7 +44,7 @@ class Chunkr(ChunkrBase):
44
44
  @anywhere()
45
45
  @ensure_client()
46
46
  async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
47
- files = prepare_upload_data(None, config)
47
+ files = await prepare_upload_data(None, config, self._client)
48
48
  r = await self._client.patch(
49
49
  f"{self.url}/api/v1/task/{task_id}",
50
50
  files=files,
@@ -4,13 +4,10 @@ from .auth import HeadersMixin
4
4
  from abc import abstractmethod
5
5
  from dotenv import load_dotenv
6
6
  import httpx
7
- import io
8
- import json
9
7
  import os
10
8
  from pathlib import Path
11
9
  from PIL import Image
12
- import requests
13
- from typing import BinaryIO, Tuple, Union
10
+ from typing import BinaryIO, Union
14
11
 
15
12
 
16
13
  class ChunkrBase(HeadersMixin):
@@ -28,130 +25,6 @@ class ChunkrBase(HeadersMixin):
28
25
  self.url = self.url.rstrip("/")
29
26
  self._client = httpx.AsyncClient()
30
27
 
31
- def _prepare_file(
32
- self, file: Union[str, Path, BinaryIO, Image.Image]
33
- ) -> Tuple[str, BinaryIO]:
34
- """Convert various file types into a tuple of (filename, file-like object).
35
-
36
- Args:
37
- file: Input file, can be:
38
- - String or Path to a file
39
- - URL string starting with http:// or https://
40
- - Base64 string
41
- - Opened binary file (mode='rb')
42
- - PIL/Pillow Image object
43
-
44
- Returns:
45
- Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
46
-
47
- Raises:
48
- FileNotFoundError: If the file path doesn't exist
49
- TypeError: If the file type is not supported
50
- ValueError: If the URL is invalid or unreachable
51
- ValueError: If the MIME type is unsupported
52
- """
53
- # Handle URLs
54
- if isinstance(file, str) and (
55
- file.startswith("http://") or file.startswith("https://")
56
- ):
57
- response = requests.get(file)
58
- response.raise_for_status()
59
- file_obj = io.BytesIO(response.content)
60
- filename = Path(file.split("/")[-1]).name or "downloaded_file"
61
- return filename, file_obj
62
-
63
- # Handle base64 strings
64
- if isinstance(file, str) and "," in file and ";base64," in file:
65
- try:
66
- # Split header and data
67
- header, base64_data = file.split(",", 1)
68
- import base64
69
-
70
- file_bytes = base64.b64decode(base64_data)
71
- file_obj = io.BytesIO(file_bytes)
72
-
73
- # Try to determine format from header
74
- format = "bin"
75
- mime_type = header.split(":")[-1].split(";")[0].lower()
76
-
77
- # Map MIME types to file extensions
78
- mime_to_ext = {
79
- "application/pdf": "pdf",
80
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
81
- "application/msword": "doc",
82
- "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
83
- "application/vnd.ms-powerpoint": "ppt",
84
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
85
- "application/vnd.ms-excel": "xls",
86
- "image/jpeg": "jpg",
87
- "image/png": "png",
88
- "image/jpg": "jpg",
89
- }
90
-
91
- if mime_type in mime_to_ext:
92
- format = mime_to_ext[mime_type]
93
- else:
94
- raise ValueError(f"Unsupported MIME type: {mime_type}")
95
-
96
- return f"file.{format}", file_obj
97
- except Exception as e:
98
- raise ValueError(f"Invalid base64 string: {str(e)}")
99
-
100
- # Handle file paths
101
- if isinstance(file, (str, Path)):
102
- path = Path(file).resolve()
103
- if not path.exists():
104
- raise FileNotFoundError(f"File not found: {file}")
105
- return path.name, open(path, "rb")
106
-
107
- # Handle PIL Images
108
- if isinstance(file, Image.Image):
109
- img_byte_arr = io.BytesIO()
110
- format = file.format or "PNG"
111
- file.save(img_byte_arr, format=format)
112
- img_byte_arr.seek(0)
113
- return f"image.{format.lower()}", img_byte_arr
114
-
115
- # Handle file-like objects
116
- if hasattr(file, "read") and hasattr(file, "seek"):
117
- # Try to get the filename from the file object if possible
118
- name = (
119
- getattr(file, "name", "document")
120
- if hasattr(file, "name")
121
- else "document"
122
- )
123
- return Path(name).name, file
124
-
125
- raise TypeError(f"Unsupported file type: {type(file)}")
126
-
127
- def _prepare_upload_data(
128
- self,
129
- file: Union[str, Path, BinaryIO, Image.Image],
130
- config: Configuration = None,
131
- ) -> Tuple[dict, dict]:
132
- """Prepare files and data dictionaries for upload.
133
-
134
- Args:
135
- file: The file to upload
136
- config: Optional configuration settings
137
-
138
- Returns:
139
- Tuple[dict, dict]: (files dict, data dict) ready for upload
140
- """
141
- filename, file_obj = self._prepare_file(file)
142
- files = {"file": (filename, file_obj)}
143
- data = {}
144
-
145
- if config:
146
- config_dict = config.model_dump(mode="json", exclude_none=True)
147
- for key, value in config_dict.items():
148
- if isinstance(value, dict):
149
- files[key] = (None, json.dumps(value), "application/json")
150
- else:
151
- data[key] = value
152
-
153
- return files, data
154
-
155
28
  @abstractmethod
156
29
  def upload(
157
30
  self,
@@ -3,16 +3,36 @@ import io
3
3
  import json
4
4
  from pathlib import Path
5
5
  from PIL import Image
6
- import requests
6
+ import httpx
7
7
  from typing import Union, Tuple, BinaryIO, Optional
8
8
 
9
- def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, BinaryIO]:
10
- """Convert various file types into a tuple of (filename, file-like object)."""
9
+ async def prepare_file(file: Union[str, Path, BinaryIO, Image.Image], client: httpx.AsyncClient = None) -> Tuple[str, BinaryIO]:
10
+ """Convert various file types into a tuple of (filename, file-like object).
11
+
12
+ Args:
13
+ file: Input file, can be:
14
+ - String or Path to a file
15
+ - URL string starting with http:// or https://
16
+ - Base64 string
17
+ - Opened binary file (mode='rb')
18
+ - PIL/Pillow Image object
19
+
20
+ Returns:
21
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
22
+
23
+ Raises:
24
+ FileNotFoundError: If the file path doesn't exist
25
+ TypeError: If the file type is not supported
26
+ ValueError: If the URL is invalid or unreachable
27
+ ValueError: If the MIME type is unsupported
28
+ """
11
29
  # Handle URLs
12
30
  if isinstance(file, str) and (
13
31
  file.startswith("http://") or file.startswith("https://")
14
32
  ):
15
- response = requests.get(file)
33
+ if not client:
34
+ raise ValueError("Client must be provided to download files from URLs")
35
+ response = client.get(file)
16
36
  response.raise_for_status()
17
37
 
18
38
  # Try to get filename from Content-Disposition header first
@@ -108,9 +128,10 @@ def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, Bi
108
128
  raise TypeError(f"Unsupported file type: {type(file)}")
109
129
 
110
130
 
111
- def prepare_upload_data(
131
+ async def prepare_upload_data(
112
132
  file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
113
133
  config: Optional[Configuration] = None,
134
+ client: httpx.AsyncClient = None,
114
135
  ) -> dict:
115
136
  """Prepare files and data dictionaries for upload.
116
137
 
@@ -123,7 +144,7 @@ def prepare_upload_data(
123
144
  """
124
145
  files = {}
125
146
  if file:
126
- filename, file_obj = prepare_file(file)
147
+ filename, file_obj = await prepare_file(file, client)
127
148
  files = {"file": (filename, file_obj)}
128
149
 
129
150
  if config:
@@ -35,10 +35,9 @@ class TaskResponse(BaseModel, Generic[T]):
35
35
  return self
36
36
  return None
37
37
 
38
+ @require_task()
38
39
  async def _poll_request(self) -> dict:
39
40
  try:
40
- if not self._client._client:
41
- raise ValueError("Client not found")
42
41
  r = await self._client._client.get(
43
42
  self.task_url, headers=self._client._headers()
44
43
  )
@@ -51,7 +50,6 @@ class TaskResponse(BaseModel, Generic[T]):
51
50
  raise
52
51
 
53
52
  @anywhere()
54
- @require_task()
55
53
  async def poll(self) -> T:
56
54
  """Poll the task for completion."""
57
55
  while True:
@@ -66,7 +64,7 @@ class TaskResponse(BaseModel, Generic[T]):
66
64
  @require_task()
67
65
  async def update(self, config: Configuration) -> T:
68
66
  """Update the task configuration."""
69
- f = prepare_upload_data(None, config)
67
+ f = await prepare_upload_data(None, config, self._client._client)
70
68
  r = await self._client._client.patch(
71
69
  self.task_url, files=f, headers=self._client._headers()
72
70
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.22
3
+ Version: 0.0.23
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
File without changes
File without changes
File without changes