chunkr-ai 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/api.py ADDED
File without changes
chunkr_ai/api/base.py ADDED
@@ -0,0 +1,173 @@
1
+ from .config import Configuration
2
+ from .task import TaskResponse
3
+ from .auth import HeadersMixin
4
+ from abc import abstractmethod
5
+ from dotenv import load_dotenv
6
+ import io
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+ from PIL import Image
11
+ import requests
12
+ from typing import BinaryIO, Tuple, Union
13
+
14
+ class ChunkrBase(HeadersMixin):
15
+ """Base class with shared functionality for Chunkr API clients."""
16
+
17
+ def __init__(self, url: str = None, api_key: str = None):
18
+ load_dotenv()
19
+ self.url = (
20
+ url or
21
+ os.getenv('CHUNKR_URL') or
22
+ 'https://api.chunkr.ai'
23
+ )
24
+ self._api_key = (
25
+ api_key or
26
+ os.getenv('CHUNKR_API_KEY')
27
+ )
28
+ if not self._api_key:
29
+ raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
30
+
31
+ self.url = self.url.rstrip("/")
32
+
33
+ def _prepare_file(
34
+ self,
35
+ file: Union[str, Path, BinaryIO, Image.Image]
36
+ ) -> Tuple[str, BinaryIO]:
37
+ """Convert various file types into a tuple of (filename, file-like object).
38
+
39
+ Args:
40
+ file: Input file, can be:
41
+ - String or Path to a file
42
+ - URL string starting with http:// or https://
43
+ - Base64 string
44
+ - Opened binary file (mode='rb')
45
+ - PIL/Pillow Image object
46
+
47
+ Returns:
48
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
49
+
50
+ Raises:
51
+ FileNotFoundError: If the file path doesn't exist
52
+ TypeError: If the file type is not supported
53
+ ValueError: If the URL is invalid or unreachable
54
+ ValueError: If the MIME type is unsupported
55
+ """
56
+ # Handle URLs
57
+ if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
58
+ response = requests.get(file)
59
+ response.raise_for_status()
60
+ file_obj = io.BytesIO(response.content)
61
+ filename = Path(file.split('/')[-1]).name or 'downloaded_file'
62
+ return filename, file_obj
63
+
64
+ # Handle base64 strings
65
+ if isinstance(file, str) and ',' in file and ';base64,' in file:
66
+ try:
67
+ # Split header and data
68
+ header, base64_data = file.split(',', 1)
69
+ import base64
70
+ file_bytes = base64.b64decode(base64_data)
71
+ file_obj = io.BytesIO(file_bytes)
72
+
73
+ # Try to determine format from header
74
+ format = 'bin'
75
+ mime_type = header.split(':')[-1].split(';')[0].lower()
76
+
77
+ # Map MIME types to file extensions
78
+ mime_to_ext = {
79
+ 'application/pdf': 'pdf',
80
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
81
+ 'application/msword': 'doc',
82
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
83
+ 'application/vnd.ms-powerpoint': 'ppt',
84
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
85
+ 'application/vnd.ms-excel': 'xls',
86
+ 'image/jpeg': 'jpg',
87
+ 'image/png': 'png',
88
+ 'image/jpg': 'jpg'
89
+ }
90
+
91
+ if mime_type in mime_to_ext:
92
+ format = mime_to_ext[mime_type]
93
+ else:
94
+ raise ValueError(f"Unsupported MIME type: {mime_type}")
95
+
96
+ return f"file.{format}", file_obj
97
+ except Exception as e:
98
+ raise ValueError(f"Invalid base64 string: {str(e)}")
99
+
100
+ # Handle file paths
101
+ if isinstance(file, (str, Path)):
102
+ path = Path(file).resolve()
103
+ if not path.exists():
104
+ raise FileNotFoundError(f"File not found: {file}")
105
+ return path.name, open(path, 'rb')
106
+
107
+ # Handle PIL Images
108
+ if isinstance(file, Image.Image):
109
+ img_byte_arr = io.BytesIO()
110
+ format = file.format or 'PNG'
111
+ file.save(img_byte_arr, format=format)
112
+ img_byte_arr.seek(0)
113
+ return f"image.{format.lower()}", img_byte_arr
114
+
115
+ # Handle file-like objects
116
+ if hasattr(file, 'read') and hasattr(file, 'seek'):
117
+ # Try to get the filename from the file object if possible
118
+ name = getattr(file, 'name', 'document') if hasattr(file, 'name') else 'document'
119
+ return Path(name).name, file
120
+
121
+ raise TypeError(f"Unsupported file type: {type(file)}")
122
+
123
+ def _prepare_upload_data(
124
+ self,
125
+ file: Union[str, Path, BinaryIO, Image.Image],
126
+ config: Configuration = None
127
+ ) -> Tuple[dict, dict]:
128
+ """Prepare files and data dictionaries for upload.
129
+
130
+ Args:
131
+ file: The file to upload
132
+ config: Optional configuration settings
133
+
134
+ Returns:
135
+ Tuple[dict, dict]: (files dict, data dict) ready for upload
136
+ """
137
+ filename, file_obj = self._prepare_file(file)
138
+ files = {"file": (filename, file_obj)}
139
+ data = {}
140
+
141
+ if config:
142
+ config_dict = config.model_dump(mode="json", exclude_none=True)
143
+ for key, value in config_dict.items():
144
+ if isinstance(value, dict):
145
+ files[key] = (None, json.dumps(value), 'application/json')
146
+ else:
147
+ data[key] = value
148
+
149
+ return files, data
150
+
151
+ @abstractmethod
152
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
153
+ """Upload a file and wait for processing to complete.
154
+
155
+ Must be implemented by subclasses.
156
+ """
157
+ pass
158
+
159
+ @abstractmethod
160
+ def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
161
+ """Upload a file for processing and immediately return the task response.
162
+
163
+ Must be implemented by subclasses.
164
+ """
165
+ pass
166
+
167
+ @abstractmethod
168
+ def get_task(self, task_id: str) -> TaskResponse:
169
+ """Get a task response by its ID.
170
+
171
+ Must be implemented by subclasses.
172
+ """
173
+ pass
chunkr_ai/api/chunkr.py CHANGED
@@ -15,86 +15,17 @@ class Chunkr(ChunkrBase):
15
15
  self._session = requests.Session()
16
16
 
17
17
  def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
18
- """Upload a file and wait for processing to complete.
19
-
20
- Args:
21
- file: The file to upload.
22
- config: Configuration options for processing. Optional.
23
-
24
- Examples:
25
- ```
26
- # Upload from file path
27
- chunkr.upload("document.pdf")
28
-
29
- # Upload from URL
30
- chunkr.upload("https://example.com/document.pdf")
31
-
32
- # Upload from base64 string (must include MIME type header)
33
- chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
34
-
35
- # Upload from opened file
36
- with open("document.pdf", "rb") as f:
37
- chunkr.upload(f)
38
-
39
- # Upload an image
40
- from PIL import Image
41
- img = Image.open("photo.jpg")
42
- chunkr.upload(img)
43
- ```
44
- Returns:
45
- TaskResponse: The completed task response
46
- """
47
18
  task = self.create_task(file, config)
48
19
  return task.poll()
49
20
 
50
21
  def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
- """Update a task by its ID and wait for processing to complete.
52
-
53
- Args:
54
- task_id: The ID of the task to update
55
- config: Configuration options for processing. Optional.
56
-
57
- Returns:
58
- TaskResponse: The updated task response
59
- """
60
22
  task = self.update_task(task_id, config)
61
23
  return task.poll()
62
24
 
63
25
  def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
64
- """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`
65
-
66
- Args:
67
- file: The file to upload.
68
- config: Configuration options for processing. Optional.
69
-
70
- Examples:
71
- ```
72
- # Upload from file path
73
- task = chunkr.start_upload("document.pdf")
74
-
75
- # Upload from opened file
76
- with open("document.pdf", "rb") as f:
77
- task = chunkr.start_upload(f)
78
-
79
- # Upload from URL
80
- task = chunkr.start_upload("https://example.com/document.pdf")
81
-
82
- # Upload from base64 string (must include MIME type header)
83
- task = chunkr.start_upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
84
-
85
- # Upload an image
86
- from PIL import Image
87
- img = Image.open("photo.jpg")
88
- task = chunkr.start_upload(img)
89
-
90
- # Wait for the task to complete - this can be done when needed
91
- task.poll()
92
- ```
93
-
94
- Returns:
95
- TaskResponse: The initial task response
96
- """
97
26
  files= prepare_upload_data(file, config)
27
+ if not self._session:
28
+ raise ValueError("Session not found")
98
29
  r = self._session.post(
99
30
  f"{self.url}/api/v1/task",
100
31
  files=files,
@@ -104,16 +35,9 @@ class Chunkr(ChunkrBase):
104
35
  return TaskResponse(**r.json()).with_client(self)
105
36
 
106
37
  def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
- """Update a task by its ID.
108
-
109
- Args:
110
- task_id: The ID of the task to update
111
- config: The new configuration to use
112
-
113
- Returns:
114
- TaskResponse: The updated task response
115
- """
116
38
  files = prepare_upload_data(None, config)
39
+ if not self._session:
40
+ raise ValueError("Session not found")
117
41
  r = self._session.patch(
118
42
  f"{self.url}/api/v1/task/{task_id}",
119
43
  files=files,
@@ -124,14 +48,8 @@ class Chunkr(ChunkrBase):
124
48
  return TaskResponse(**r.json()).with_client(self)
125
49
 
126
50
  def get_task(self, task_id: str) -> TaskResponse:
127
- """Get a task response by its ID.
128
-
129
- Args:
130
- task_id: The ID of the task to get
131
-
132
- Returns:
133
- TaskResponse: The task response
134
- """
51
+ if not self._session:
52
+ raise ValueError("Session not found")
135
53
  r = self._session.get(
136
54
  f"{self.url}/api/v1/task/{task_id}",
137
55
  headers=self._headers()
@@ -141,11 +59,8 @@ class Chunkr(ChunkrBase):
141
59
 
142
60
 
143
61
  def delete_task(self, task_id: str) -> None:
144
- """Delete a task by its ID.
145
-
146
- Args:
147
- task_id: The ID of the task to delete
148
- """
62
+ if not self._session:
63
+ raise ValueError("Session not found")
149
64
  r = self._session.delete(
150
65
  f"{self.url}/api/v1/task/{task_id}",
151
66
  headers=self._headers()
@@ -153,11 +68,8 @@ class Chunkr(ChunkrBase):
153
68
  r.raise_for_status()
154
69
 
155
70
  def cancel_task(self, task_id: str) -> None:
156
- """Cancel a task by its ID.
157
-
158
- Args:
159
- task_id: The ID of the task to cancel
160
- """
71
+ if not self._session:
72
+ raise ValueError("Session not found")
161
73
  r = self._session.get(
162
74
  f"{self.url}/api/v1/task/{task_id}/cancel",
163
75
  headers=self._headers()
@@ -1,11 +1,11 @@
1
1
  from .chunkr_base import ChunkrBase
2
- from .task import TaskResponse
3
2
  from .config import Configuration
3
+ from .misc import prepare_upload_data
4
+ from .task_async import TaskResponseAsync
4
5
  import httpx
5
6
  from pathlib import Path
6
7
  from PIL import Image
7
8
  from typing import Union, BinaryIO
8
- from .misc import prepare_upload_data
9
9
 
10
10
  class ChunkrAsync(ChunkrBase):
11
11
  """Asynchronous Chunkr API client"""
@@ -14,129 +14,99 @@ class ChunkrAsync(ChunkrBase):
14
14
  super().__init__(url, api_key)
15
15
  self._client = httpx.AsyncClient()
16
16
 
17
- async def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
18
- """Upload a file and wait for processing to complete.
19
-
20
- Args:
21
- file: The file to upload.
22
- config: Configuration options for processing. Optional.
23
-
24
- Examples:
25
- ```python
26
- # Upload from file path
27
- await chunkr.upload("document.pdf")
28
-
29
- # Upload from opened file
30
- with open("document.pdf", "rb") as f:
31
- await chunkr.upload(f)
32
-
33
- # Upload from URL
34
- await chunkr.upload("https://example.com/document.pdf")
35
-
36
- # Upload from base64 string (must include MIME type header)
37
- await chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
38
-
39
- # Upload an image
40
- from PIL import Image
41
- img = Image.open("photo.jpg")
42
- await chunkr.upload(img)
43
- ```
44
- Returns:
45
- TaskResponse: The completed task response
46
- """
47
- task = await self.create_task(file, config)
48
- return await task.poll_async()
17
+ async def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponseAsync:
18
+ if not self._client or self._client.is_closed:
19
+ self._client = httpx.AsyncClient()
20
+ try:
21
+ task = await self.create_task(file, config)
22
+ return await task.poll()
23
+ except Exception as e:
24
+ await self._client.aclose()
25
+ raise e
49
26
 
50
- async def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
- """Update a task by its ID and wait for processing to complete.
52
-
53
- Args:
54
- task_id: The ID of the task to update
55
- config: Configuration options for processing. Optional.
56
-
57
- Returns:
58
- TaskResponse: The updated task response
59
- """
60
- task = await self.update_task(task_id, config)
61
- return await task.poll_async()
62
-
63
- async def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
64
- """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll_async()`.
65
-
66
- Args:
67
- file: The file to upload.
68
- config: Configuration options for processing. Optional.
69
-
70
- Examples:
71
- ```
72
- # Upload from file path
73
- task = await chunkr.start_upload("document.pdf")
74
-
75
- # Upload from opened file
76
- with open("document.pdf", "rb") as f:
77
- task = await chunkr.start_upload(f)
78
-
79
- # Upload from URL
80
- task = await chunkr.start_upload("https://example.com/document.pdf")
81
-
82
- # Upload from base64 string (must include MIME type header)
83
- task = await chunkr.start_upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
84
-
85
- # Upload an image
86
- from PIL import Image
87
- img = Image.open("photo.jpg")
88
- task = await chunkr.start_upload(img)
89
-
90
- # Wait for the task to complete - this can be done when needed
91
- await task.poll_async()
92
- ```
93
-
94
- Returns:
95
- TaskResponse: The initial task response
96
- """
97
- files = prepare_upload_data(file, config)
98
- r = await self._client.post(
99
- f"{self.url}/api/v1/task",
100
- files=files,
101
- headers=self._headers()
102
- )
103
- r.raise_for_status()
104
- return TaskResponse(**r.json()).with_client(self)
105
-
106
- async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
- files = prepare_upload_data(None, config)
108
- r = await self._client.patch(
109
- f"{self.url}/api/v1/task/{task_id}",
110
- files=files,
111
- headers=self._headers()
112
- )
27
+ async def update(self, task_id: str, config: Configuration) -> TaskResponseAsync:
28
+ if not self._client or self._client.is_closed:
29
+ self._client = httpx.AsyncClient()
30
+ try:
31
+ task = await self.update_task(task_id, config)
32
+ return await task.poll()
33
+ except Exception as e:
34
+ await self._client.aclose()
35
+ raise e
36
+
37
+ async def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponseAsync:
38
+ if not self._client or self._client.is_closed:
39
+ self._client = httpx.AsyncClient()
40
+ try:
41
+ files = prepare_upload_data(file, config)
42
+ r = await self._client.post(
43
+ f"{self.url}/api/v1/task",
44
+ files=files,
45
+ headers=self._headers()
46
+ )
47
+ r.raise_for_status()
48
+ return TaskResponseAsync(**r.json()).with_client(self)
49
+ except Exception as e:
50
+ await self._client.aclose()
51
+ raise e
52
+
53
+ async def update_task(self, task_id: str, config: Configuration) -> TaskResponseAsync:
54
+ if not self._client or self._client.is_closed:
55
+ self._client = httpx.AsyncClient()
56
+ try:
57
+ files = prepare_upload_data(None, config)
58
+ r = await self._client.patch(
59
+ f"{self.url}/api/v1/task/{task_id}",
60
+ files=files,
61
+ headers=self._headers()
62
+ )
113
63
 
114
- r.raise_for_status()
115
- return TaskResponse(**r.json()).with_client(self)
64
+ r.raise_for_status()
65
+ return TaskResponseAsync(**r.json()).with_client(self)
66
+ except Exception as e:
67
+ await self._client.aclose()
68
+ raise e
116
69
 
117
- async def get_task(self, task_id: str) -> TaskResponse:
118
- r = await self._client.get(
119
- f"{self.url}/api/v1/task/{task_id}",
120
- headers=self._headers()
121
- )
122
- r.raise_for_status()
123
- return TaskResponse(**r.json()).with_client(self)
70
+ async def get_task(self, task_id: str) -> TaskResponseAsync:
71
+ if not self._client or self._client.is_closed:
72
+ self._client = httpx.AsyncClient()
73
+ try:
74
+ r = await self._client.get(
75
+ f"{self.url}/api/v1/task/{task_id}",
76
+ headers=self._headers()
77
+ )
78
+ r.raise_for_status()
79
+ return TaskResponseAsync(**r.json()).with_client(self)
80
+ except Exception as e:
81
+ await self._client.aclose()
82
+ raise e
124
83
 
125
84
  async def delete_task(self, task_id: str) -> None:
126
- r = await self._client.delete(
127
- f"{self.url}/api/v1/task/{task_id}",
128
- headers=self._headers()
129
- )
130
- r.raise_for_status()
85
+ if not self._client or self._client.is_closed:
86
+ self._client = httpx.AsyncClient()
87
+ try:
88
+ r = await self._client.delete(
89
+ f"{self.url}/api/v1/task/{task_id}",
90
+ headers=self._headers()
91
+ )
92
+ r.raise_for_status()
93
+ except Exception as e:
94
+ await self._client.aclose()
95
+ raise e
131
96
 
132
97
  async def cancel_task(self, task_id: str) -> None:
133
- r = await self._client.get(
134
- f"{self.url}/api/v1/task/{task_id}/cancel",
135
- headers=self._headers()
136
- )
137
- r.raise_for_status()
98
+ if not self._client or self._client.is_closed:
99
+ self._client = httpx.AsyncClient()
100
+ try:
101
+ r = await self._client.get(
102
+ f"{self.url}/api/v1/task/{task_id}/cancel",
103
+ headers=self._headers()
104
+ )
105
+ r.raise_for_status()
106
+ except Exception as e:
107
+ await self._client.aclose()
108
+ raise e
138
109
 
139
-
140
110
  async def __aenter__(self):
141
111
  return self
142
112
 
@@ -1,5 +1,6 @@
1
1
  from .config import Configuration
2
2
  from .task import TaskResponse
3
+ from .task_async import TaskResponseAsync
3
4
  from .auth import HeadersMixin
4
5
  from abc import abstractmethod
5
6
  from dotenv import load_dotenv
@@ -28,42 +29,107 @@ class ChunkrBase(HeadersMixin):
28
29
  self.url = self.url.rstrip("/")
29
30
 
30
31
  @abstractmethod
31
- def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
32
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> Union[TaskResponse, TaskResponseAsync]:
32
33
  """Upload a file and wait for processing to complete.
34
+
35
+ Args:
36
+ file: The file to upload.
37
+ config: Configuration options for processing. Optional.
38
+
39
+ Examples:
40
+ ```python
41
+ # Upload from file path
42
+ await chunkr.upload("document.pdf")
43
+
44
+ # Upload from opened file
45
+ with open("document.pdf", "rb") as f:
46
+ await chunkr.upload(f)
33
47
 
34
- Must be implemented by subclasses.
48
+ # Upload from URL
49
+ await chunkr.upload("https://example.com/document.pdf")
50
+
51
+ # Upload from base64 string (must include MIME type header)
52
+ await chunkr.upload("data:application/pdf;base64,JVBERi0...")
53
+
54
+ # Upload an image
55
+ from PIL import Image
56
+ img = Image.open("photo.jpg")
57
+ await chunkr.upload(img)
58
+ ```
59
+ Returns:
60
+ TaskResponse: The completed task response
35
61
  """
36
62
  pass
37
63
 
38
64
  @abstractmethod
39
- def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
40
- """Update a task by its ID.
65
+ def update(self, task_id: str, config: Configuration) -> Union[TaskResponse, TaskResponseAsync]:
66
+ """Update a task by its ID and wait for processing to complete.
41
67
 
42
- Must be implemented by subclasses.
68
+ Args:
69
+ task_id: The ID of the task to update
70
+ config: Configuration options for processing. Optional.
71
+
72
+ Returns:
73
+ TaskResponse: The updated task response
43
74
  """
44
75
  pass
45
76
 
46
77
  @abstractmethod
47
- def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
48
- """Upload a file for processing and immediately return the task response.
49
-
50
- Must be implemented by subclasses.
78
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> Union[TaskResponse, TaskResponseAsync]:
79
+ """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
80
+
81
+ Args:
82
+ file: The file to upload.
83
+ config: Configuration options for processing. Optional.
84
+
85
+ Examples:
86
+ ```
87
+ # Upload from file path
88
+ task = await chunkr.create_task("document.pdf")
89
+
90
+ # Upload from opened file
91
+ with open("document.pdf", "rb") as f:
92
+ task = await chunkr.create_task(f)
93
+
94
+ # Upload from URL
95
+ task = await chunkr.create_task("https://example.com/document.pdf")
96
+
97
+ # Upload from base64 string (must include MIME type header)
98
+ task = await chunkr.create_task("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
99
+
100
+ # Upload an image
101
+ from PIL import Image
102
+ img = Image.open("photo.jpg")
103
+ task = await chunkr.create_task(img)
104
+
105
+ # Wait for the task to complete - this can be done when needed
106
+ await task.poll()
107
+ ```
51
108
  """
52
109
  pass
53
110
 
54
111
  @abstractmethod
55
- def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
56
- """Update a task by its ID.
112
+ def update_task(self, task_id: str, config: Configuration) -> Union[TaskResponse, TaskResponseAsync]:
113
+ """Update a task by its ID and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
57
114
 
58
- Must be implemented by subclasses.
115
+ Args:
116
+ task_id: The ID of the task to update
117
+ config: Configuration options for processing. Optional.
118
+
119
+ Returns:
120
+ TaskResponse: The updated task response
59
121
  """
60
122
  pass
61
123
 
62
124
  @abstractmethod
63
- def get_task(self, task_id: str) -> TaskResponse:
125
+ def get_task(self, task_id: str) -> Union[TaskResponse, TaskResponseAsync]:
64
126
  """Get a task response by its ID.
65
127
 
66
- Must be implemented by subclasses.
128
+ Args:
129
+ task_id: The ID of the task to get
130
+
131
+ Returns:
132
+ TaskResponse: The task response
67
133
  """
68
134
  pass
69
135
 
@@ -71,7 +137,8 @@ class ChunkrBase(HeadersMixin):
71
137
  def delete_task(self, task_id: str) -> None:
72
138
  """Delete a task by its ID.
73
139
 
74
- Must be implemented by subclasses.
140
+ Args:
141
+ task_id: The ID of the task to delete
75
142
  """
76
143
  pass
77
144
 
@@ -79,7 +146,8 @@ class ChunkrBase(HeadersMixin):
79
146
  def cancel_task(self, task_id: str) -> None:
80
147
  """Cancel a task by its ID.
81
148
 
82
- Must be implemented by subclasses.
149
+ Args:
150
+ task_id: The ID of the task to cancel
83
151
  """
84
152
  pass
85
153
 
chunkr_ai/api/protocol.py CHANGED
@@ -1,14 +1,14 @@
1
- from typing import runtime_checkable, Protocol
1
+ from typing import Optional, runtime_checkable, Protocol
2
2
  from requests import Session
3
- from httpx import AsyncClient
3
+ from aiohttp import ClientSession
4
4
 
5
5
  @runtime_checkable
6
6
  class ChunkrClientProtocol(Protocol):
7
7
  """Protocol defining the interface for Chunkr clients"""
8
8
  url: str
9
9
  _api_key: str
10
- _session: Session
11
- _client: AsyncClient
10
+ _session: Optional[Session] = None
11
+ _client: Optional[ClientSession] = None
12
12
 
13
13
  def get_api_key(self) -> str:
14
14
  """Get the API key"""
chunkr_ai/api/task.py CHANGED
@@ -7,6 +7,10 @@ class TaskResponse(TaskBase):
7
7
  def _poll_request(self) -> dict:
8
8
  while True:
9
9
  try:
10
+ if not self.task_url:
11
+ raise ValueError("Task URL not found in response")
12
+ if not self._client._session:
13
+ raise ValueError("Client session not found")
10
14
  r = self._client._session.get(self.task_url, headers=self._client._headers())
11
15
  r.raise_for_status()
12
16
  return r.json()
@@ -17,10 +21,8 @@ class TaskResponse(TaskBase):
17
21
  raise
18
22
 
19
23
  def poll(self) -> 'TaskResponse':
20
- if not self.task_url:
21
- raise ValueError("Task URL not found in response")
22
24
  while True:
23
- response = self._poll_request_sync()
25
+ response = self._poll_request()
24
26
  updated_task = TaskResponse(**response).with_client(self._client)
25
27
  self.__dict__.update(updated_task.__dict__)
26
28
  if result := self._check_status():
@@ -30,9 +32,11 @@ class TaskResponse(TaskBase):
30
32
  def update(self, config: Configuration) -> 'TaskResponse':
31
33
  if not self.task_url:
32
34
  raise ValueError("Task URL not found")
35
+ if not self._client._session:
36
+ raise ValueError("Client session not found")
33
37
  files = prepare_upload_data(None, config)
34
38
  r = self._client._session.patch(
35
- f"{self.task_url}",
39
+ self.task_url,
36
40
  files=files,
37
41
  headers=self._client._headers()
38
42
  )
@@ -44,6 +48,8 @@ class TaskResponse(TaskBase):
44
48
  def cancel(self):
45
49
  if not self.task_url:
46
50
  raise ValueError("Task URL not found")
51
+ if not self._client._session:
52
+ raise ValueError("Client session not found")
47
53
  r = self._client._session.get(
48
54
  f"{self.task_url}/cancel",
49
55
  headers=self._client._headers()
@@ -54,6 +60,8 @@ class TaskResponse(TaskBase):
54
60
  def delete(self):
55
61
  if not self.task_url:
56
62
  raise ValueError("Task URL not found")
63
+ if not self._client._session:
64
+ raise ValueError("Client session not found")
57
65
  r = self._client._session.delete(
58
66
  self.task_url,
59
67
  headers=self._client._headers()
@@ -6,6 +6,8 @@ import asyncio
6
6
  class TaskResponseAsync(TaskBase):
7
7
  async def _poll_request(self) -> dict:
8
8
  try:
9
+ if not self._client._client:
10
+ raise ValueError("Client not found")
9
11
  r = await self._client._client.get(self.task_url, headers=self._client._headers())
10
12
  r.raise_for_status()
11
13
  return r.json()
@@ -18,6 +20,8 @@ class TaskResponseAsync(TaskBase):
18
20
  async def poll(self) -> 'TaskResponseAsync':
19
21
  if not self.task_url:
20
22
  raise ValueError("Task URL not found")
23
+ if not self._client._client:
24
+ raise ValueError("Client not found")
21
25
  while True:
22
26
  j = await self._poll_request()
23
27
  updated = TaskResponseAsync(**j).with_client(self._client)
@@ -29,6 +33,8 @@ class TaskResponseAsync(TaskBase):
29
33
  async def update(self, config: Configuration) -> 'TaskResponseAsync':
30
34
  if not self.task_url:
31
35
  raise ValueError("Task URL not found")
36
+ if not self._client._client:
37
+ raise ValueError("Client not found")
32
38
  f = prepare_upload_data(None, config)
33
39
  r = await self._client._client.patch(self.task_url, files=f, headers=self._client._headers())
34
40
  r.raise_for_status()
@@ -39,6 +45,8 @@ class TaskResponseAsync(TaskBase):
39
45
  async def cancel(self):
40
46
  if not self.task_url:
41
47
  raise ValueError("Task URL not found")
48
+ if not self._client._client:
49
+ raise ValueError("Client not found")
42
50
  r = await self._client._client.get(f"{self.task_url}/cancel", headers=self._client._headers())
43
51
  r.raise_for_status()
44
52
  return await self.poll()
@@ -46,5 +54,7 @@ class TaskResponseAsync(TaskBase):
46
54
  async def delete(self):
47
55
  if not self.task_url:
48
56
  raise ValueError("Task URL not found")
57
+ if not self._client._client:
58
+ raise ValueError("Client not found")
49
59
  r = await self._client._client.delete(self.task_url, headers=self._client._headers())
50
- r.raise_for_status()
60
+ r.raise_for_status()
@@ -1,8 +1,7 @@
1
- from .config import Configuration
1
+ from .config import Configuration, Status, OutputResponse
2
2
  from .protocol import ChunkrClientProtocol
3
- from ..models import Status, OutputResponse
4
3
  from abc import ABC, abstractmethod
5
- from typing import TypeVar, Optional, Generic, Union
4
+ from typing import TypeVar, Optional, Generic
6
5
  from pydantic import BaseModel, PrivateAttr
7
6
  from datetime import datetime
8
7
 
@@ -23,7 +22,7 @@ class TaskBase(BaseModel, ABC, Generic[T]):
23
22
  status: Status
24
23
  task_id: str
25
24
  task_url: Optional[str]
26
- _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
25
+ _client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
27
26
 
28
27
  @abstractmethod
29
28
  def _poll_request(self) -> dict:
@@ -50,7 +49,7 @@ class TaskBase(BaseModel, ABC, Generic[T]):
50
49
  """Delete the task."""
51
50
  pass
52
51
 
53
- def with_client(self, client: Union[ChunkrClientProtocol]) -> T:
52
+ def with_client(self, client: ChunkrClientProtocol) -> T:
54
53
  self._client = client
55
54
  return self
56
55
 
@@ -1,13 +1,12 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.12
3
+ Version: 0.0.14
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.25.0
10
- Requires-Dist: httpx>=0.25.0
11
10
  Requires-Dist: pillow>=10.0.0
12
11
  Requires-Dist: pydantic>=2.0.0
13
12
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -81,7 +80,7 @@ async def process_document():
81
80
  # If you want to upload without waiting for processing
82
81
  task = await chunkr.start_upload("document.pdf")
83
82
  # ... do other things ...
84
- await task.poll_async() # Check status when needed
83
+ await task.poll() # Check status when needed
85
84
  ```
86
85
 
87
86
  ### Additional Features
@@ -0,0 +1,21 @@
1
+ chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
+ chunkr_ai/models.py,sha256=-dbwtTHTcGhH3LXUdVUPkobbPoeFNXRizeAW8BCGSkE,903
3
+ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
6
+ chunkr_ai/api/base.py,sha256=WDHx8tU0fl9_-yvYTKL-U0uaxHv-8_bRfiw9Xkl-mWM,6499
7
+ chunkr_ai/api/chunkr.py,sha256=A1KEjA4zRJkGZhYwhWde4CFncrljEMky4SO4LPzYvws,2652
8
+ chunkr_ai/api/chunkr_async.py,sha256=OvXd9Ma3rsp6q8nJsWzNgTKAGk-py93DqAENE8fMQfc,4153
9
+ chunkr_ai/api/chunkr_base.py,sha256=run4UJVKa7Gx8I_ME0Mol-c_b-NIcexNkgxHu_hbX5M,4996
10
+ chunkr_ai/api/config.py,sha256=joTn7jiOlJXTwwza-jHauLV-39CMzaxZVGB9JBm8Cok,4862
11
+ chunkr_ai/api/misc.py,sha256=9vnfrbJ7sFlZqwEIQ4NTMb5rhPOmETT7e1jR-b42PXM,4977
12
+ chunkr_ai/api/protocol.py,sha256=li-zy7Z-ChR9kZqJlixQv1kUYrmesPHxwUtnE5p16tQ,529
13
+ chunkr_ai/api/schema.py,sha256=OeLOhBRXeRBgEImg0Q6O9Z10ojT6aSEVvwnDR8UeENo,4971
14
+ chunkr_ai/api/task.py,sha256=j-Odecnbj3NjAGDyUNmZHgqsRDYhHs3xBWgsT2rrHjs,2517
15
+ chunkr_ai/api/task_async.py,sha256=HpuVW928s-V3jjPa8L5i86lvKcpKbMhkCuV1nKSyXVA,2437
16
+ chunkr_ai/api/task_base.py,sha256=9S8UCsrEOAH48PmOpLlyAifKDrtaUtrBMUmMZq4Dceg,2328
17
+ chunkr_ai-0.0.14.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ chunkr_ai-0.0.14.dist-info/METADATA,sha256=1reEDprWgXg6bKCGKv6LXJxZZR-dQeJn9DeXGbp4Iwk,4839
19
+ chunkr_ai-0.0.14.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
20
+ chunkr_ai-0.0.14.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
21
+ chunkr_ai-0.0.14.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
- chunkr_ai/models.py,sha256=-dbwtTHTcGhH3LXUdVUPkobbPoeFNXRizeAW8BCGSkE,903
3
- chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
5
- chunkr_ai/api/chunkr.py,sha256=0qpV9b1hOpDhA9EuKkXW9X_laUmw5NY3ZYq0cUOTbww,5190
6
- chunkr_ai/api/chunkr_async.py,sha256=ZkLBrn4cqzu3sqMfS8cfZZgSvpdyQuWZP95lfGxuHx0,4900
7
- chunkr_ai/api/chunkr_base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
8
- chunkr_ai/api/config.py,sha256=joTn7jiOlJXTwwza-jHauLV-39CMzaxZVGB9JBm8Cok,4862
9
- chunkr_ai/api/misc.py,sha256=9vnfrbJ7sFlZqwEIQ4NTMb5rhPOmETT7e1jR-b42PXM,4977
10
- chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
11
- chunkr_ai/api/schema.py,sha256=OeLOhBRXeRBgEImg0Q6O9Z10ojT6aSEVvwnDR8UeENo,4971
12
- chunkr_ai/api/task.py,sha256=4insrdGEVzBHs1ejZvde8bbEetVzgJELa47UjhfBqCA,2116
13
- chunkr_ai/api/task_async.py,sha256=LqS-LL-mCOgfGsgvuSXhKkSEUM6MMro-EZHl_ZedQQk,1998
14
- chunkr_ai/api/task_base.py,sha256=iS5UVIDEPIiDoWrn21Oh_dQurkd_hvKQ8ng32j6sGoA,2369
15
- chunkr_ai-0.0.12.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- chunkr_ai-0.0.12.dist-info/METADATA,sha256=dfo9myRizW2A5W0H6FpIoBzHa4QxmEe3lsedPYhwjXM,4874
17
- chunkr_ai-0.0.12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
18
- chunkr_ai-0.0.12.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
19
- chunkr_ai-0.0.12.dist-info/RECORD,,