chunkr-ai 0.0.12__tar.gz → 0.0.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {chunkr_ai-0.0.12/src/chunkr_ai.egg-info → chunkr_ai-0.0.14}/PKG-INFO +2 -3
  2. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/README.md +1 -1
  3. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/pyproject.toml +1 -2
  4. chunkr_ai-0.0.14/src/chunkr_ai/api/api.py +0 -0
  5. chunkr_ai-0.0.14/src/chunkr_ai/api/base.py +173 -0
  6. chunkr_ai-0.0.14/src/chunkr_ai/api/chunkr.py +77 -0
  7. chunkr_ai-0.0.14/src/chunkr_ai/api/chunkr_async.py +114 -0
  8. chunkr_ai-0.0.12/src/chunkr_ai/api/chunkr.py → chunkr_ai-0.0.14/src/chunkr_ai/api/chunkr_base.py +63 -75
  9. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/protocol.py +4 -4
  10. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/task.py +12 -4
  11. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/task_async.py +11 -1
  12. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/task_base.py +4 -5
  13. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14/src/chunkr_ai.egg-info}/PKG-INFO +2 -3
  14. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai.egg-info/SOURCES.txt +2 -0
  15. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai.egg-info/requires.txt +0 -1
  16. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/tests/test_chunkr.py +15 -25
  17. chunkr_ai-0.0.12/src/chunkr_ai/api/chunkr_async.py +0 -144
  18. chunkr_ai-0.0.12/src/chunkr_ai/api/chunkr_base.py +0 -85
  19. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/LICENSE +0 -0
  20. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/setup.cfg +0 -0
  21. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/__init__.py +0 -0
  22. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/__init__.py +0 -0
  23. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/auth.py +0 -0
  24. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/config.py +0 -0
  25. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/misc.py +0 -0
  26. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/api/schema.py +0 -0
  27. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai/models.py +0 -0
  28. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  29. {chunkr_ai-0.0.12 → chunkr_ai-0.0.14}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,13 +1,12 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.12
3
+ Version: 0.0.14
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.25.0
10
- Requires-Dist: httpx>=0.25.0
11
10
  Requires-Dist: pillow>=10.0.0
12
11
  Requires-Dist: pydantic>=2.0.0
13
12
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -81,7 +80,7 @@ async def process_document():
81
80
  # If you want to upload without waiting for processing
82
81
  task = await chunkr.start_upload("document.pdf")
83
82
  # ... do other things ...
84
- await task.poll_async() # Check status when needed
83
+ await task.poll() # Check status when needed
85
84
  ```
86
85
 
87
86
  ### Additional Features
@@ -62,7 +62,7 @@ async def process_document():
62
62
  # If you want to upload without waiting for processing
63
63
  task = await chunkr.start_upload("document.pdf")
64
64
  # ... do other things ...
65
- await task.poll_async() # Check status when needed
65
+ await task.poll() # Check status when needed
66
66
  ```
67
67
 
68
68
  ### Additional Features
@@ -4,14 +4,13 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.12"
7
+ version = "0.0.14"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
11
11
  license = {"file" = "LICENSE"}
12
12
  urls = {Homepage = "https://chunkr.ai"}
13
13
  dependencies = [
14
- "httpx>=0.25.0",
15
14
  "httpx>=0.25.0",
16
15
  "pillow>=10.0.0",
17
16
  "pydantic>=2.0.0",
File without changes
@@ -0,0 +1,173 @@
1
+ from .config import Configuration
2
+ from .task import TaskResponse
3
+ from .auth import HeadersMixin
4
+ from abc import abstractmethod
5
+ from dotenv import load_dotenv
6
+ import io
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+ from PIL import Image
11
+ import requests
12
+ from typing import BinaryIO, Tuple, Union
13
+
14
+ class ChunkrBase(HeadersMixin):
15
+ """Base class with shared functionality for Chunkr API clients."""
16
+
17
+ def __init__(self, url: str = None, api_key: str = None):
18
+ load_dotenv()
19
+ self.url = (
20
+ url or
21
+ os.getenv('CHUNKR_URL') or
22
+ 'https://api.chunkr.ai'
23
+ )
24
+ self._api_key = (
25
+ api_key or
26
+ os.getenv('CHUNKR_API_KEY')
27
+ )
28
+ if not self._api_key:
29
+ raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
30
+
31
+ self.url = self.url.rstrip("/")
32
+
33
+ def _prepare_file(
34
+ self,
35
+ file: Union[str, Path, BinaryIO, Image.Image]
36
+ ) -> Tuple[str, BinaryIO]:
37
+ """Convert various file types into a tuple of (filename, file-like object).
38
+
39
+ Args:
40
+ file: Input file, can be:
41
+ - String or Path to a file
42
+ - URL string starting with http:// or https://
43
+ - Base64 string
44
+ - Opened binary file (mode='rb')
45
+ - PIL/Pillow Image object
46
+
47
+ Returns:
48
+ Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
49
+
50
+ Raises:
51
+ FileNotFoundError: If the file path doesn't exist
52
+ TypeError: If the file type is not supported
53
+ ValueError: If the URL is invalid or unreachable
54
+ ValueError: If the MIME type is unsupported
55
+ """
56
+ # Handle URLs
57
+ if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
58
+ response = requests.get(file)
59
+ response.raise_for_status()
60
+ file_obj = io.BytesIO(response.content)
61
+ filename = Path(file.split('/')[-1]).name or 'downloaded_file'
62
+ return filename, file_obj
63
+
64
+ # Handle base64 strings
65
+ if isinstance(file, str) and ',' in file and ';base64,' in file:
66
+ try:
67
+ # Split header and data
68
+ header, base64_data = file.split(',', 1)
69
+ import base64
70
+ file_bytes = base64.b64decode(base64_data)
71
+ file_obj = io.BytesIO(file_bytes)
72
+
73
+ # Try to determine format from header
74
+ format = 'bin'
75
+ mime_type = header.split(':')[-1].split(';')[0].lower()
76
+
77
+ # Map MIME types to file extensions
78
+ mime_to_ext = {
79
+ 'application/pdf': 'pdf',
80
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
81
+ 'application/msword': 'doc',
82
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
83
+ 'application/vnd.ms-powerpoint': 'ppt',
84
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
85
+ 'application/vnd.ms-excel': 'xls',
86
+ 'image/jpeg': 'jpg',
87
+ 'image/png': 'png',
88
+ 'image/jpg': 'jpg'
89
+ }
90
+
91
+ if mime_type in mime_to_ext:
92
+ format = mime_to_ext[mime_type]
93
+ else:
94
+ raise ValueError(f"Unsupported MIME type: {mime_type}")
95
+
96
+ return f"file.{format}", file_obj
97
+ except Exception as e:
98
+ raise ValueError(f"Invalid base64 string: {str(e)}")
99
+
100
+ # Handle file paths
101
+ if isinstance(file, (str, Path)):
102
+ path = Path(file).resolve()
103
+ if not path.exists():
104
+ raise FileNotFoundError(f"File not found: {file}")
105
+ return path.name, open(path, 'rb')
106
+
107
+ # Handle PIL Images
108
+ if isinstance(file, Image.Image):
109
+ img_byte_arr = io.BytesIO()
110
+ format = file.format or 'PNG'
111
+ file.save(img_byte_arr, format=format)
112
+ img_byte_arr.seek(0)
113
+ return f"image.{format.lower()}", img_byte_arr
114
+
115
+ # Handle file-like objects
116
+ if hasattr(file, 'read') and hasattr(file, 'seek'):
117
+ # Try to get the filename from the file object if possible
118
+ name = getattr(file, 'name', 'document') if hasattr(file, 'name') else 'document'
119
+ return Path(name).name, file
120
+
121
+ raise TypeError(f"Unsupported file type: {type(file)}")
122
+
123
+ def _prepare_upload_data(
124
+ self,
125
+ file: Union[str, Path, BinaryIO, Image.Image],
126
+ config: Configuration = None
127
+ ) -> Tuple[dict, dict]:
128
+ """Prepare files and data dictionaries for upload.
129
+
130
+ Args:
131
+ file: The file to upload
132
+ config: Optional configuration settings
133
+
134
+ Returns:
135
+ Tuple[dict, dict]: (files dict, data dict) ready for upload
136
+ """
137
+ filename, file_obj = self._prepare_file(file)
138
+ files = {"file": (filename, file_obj)}
139
+ data = {}
140
+
141
+ if config:
142
+ config_dict = config.model_dump(mode="json", exclude_none=True)
143
+ for key, value in config_dict.items():
144
+ if isinstance(value, dict):
145
+ files[key] = (None, json.dumps(value), 'application/json')
146
+ else:
147
+ data[key] = value
148
+
149
+ return files, data
150
+
151
+ @abstractmethod
152
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
153
+ """Upload a file and wait for processing to complete.
154
+
155
+ Must be implemented by subclasses.
156
+ """
157
+ pass
158
+
159
+ @abstractmethod
160
+ def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
161
+ """Upload a file for processing and immediately return the task response.
162
+
163
+ Must be implemented by subclasses.
164
+ """
165
+ pass
166
+
167
+ @abstractmethod
168
+ def get_task(self, task_id: str) -> TaskResponse:
169
+ """Get a task response by its ID.
170
+
171
+ Must be implemented by subclasses.
172
+ """
173
+ pass
@@ -0,0 +1,77 @@
1
+ from .chunkr_base import ChunkrBase
2
+ from .config import Configuration
3
+ from .task import TaskResponse
4
+ from pathlib import Path
5
+ from PIL import Image
6
+ import requests
7
+ from typing import Union, BinaryIO
8
+ from .misc import prepare_upload_data
9
+
10
+ class Chunkr(ChunkrBase):
11
+ """Chunkr API client"""
12
+
13
+ def __init__(self, url: str = None, api_key: str = None):
14
+ super().__init__(url, api_key)
15
+ self._session = requests.Session()
16
+
17
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
18
+ task = self.create_task(file, config)
19
+ return task.poll()
20
+
21
+ def update(self, task_id: str, config: Configuration) -> TaskResponse:
22
+ task = self.update_task(task_id, config)
23
+ return task.poll()
24
+
25
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
26
+ files= prepare_upload_data(file, config)
27
+ if not self._session:
28
+ raise ValueError("Session not found")
29
+ r = self._session.post(
30
+ f"{self.url}/api/v1/task",
31
+ files=files,
32
+ headers=self._headers()
33
+ )
34
+ r.raise_for_status()
35
+ return TaskResponse(**r.json()).with_client(self)
36
+
37
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
38
+ files = prepare_upload_data(None, config)
39
+ if not self._session:
40
+ raise ValueError("Session not found")
41
+ r = self._session.patch(
42
+ f"{self.url}/api/v1/task/{task_id}",
43
+ files=files,
44
+ headers=self._headers()
45
+ )
46
+
47
+ r.raise_for_status()
48
+ return TaskResponse(**r.json()).with_client(self)
49
+
50
+ def get_task(self, task_id: str) -> TaskResponse:
51
+ if not self._session:
52
+ raise ValueError("Session not found")
53
+ r = self._session.get(
54
+ f"{self.url}/api/v1/task/{task_id}",
55
+ headers=self._headers()
56
+ )
57
+ r.raise_for_status()
58
+ return TaskResponse(**r.json()).with_client(self)
59
+
60
+
61
+ def delete_task(self, task_id: str) -> None:
62
+ if not self._session:
63
+ raise ValueError("Session not found")
64
+ r = self._session.delete(
65
+ f"{self.url}/api/v1/task/{task_id}",
66
+ headers=self._headers()
67
+ )
68
+ r.raise_for_status()
69
+
70
+ def cancel_task(self, task_id: str) -> None:
71
+ if not self._session:
72
+ raise ValueError("Session not found")
73
+ r = self._session.get(
74
+ f"{self.url}/api/v1/task/{task_id}/cancel",
75
+ headers=self._headers()
76
+ )
77
+ r.raise_for_status()
@@ -0,0 +1,114 @@
1
+ from .chunkr_base import ChunkrBase
2
+ from .config import Configuration
3
+ from .misc import prepare_upload_data
4
+ from .task_async import TaskResponseAsync
5
+ import httpx
6
+ from pathlib import Path
7
+ from PIL import Image
8
+ from typing import Union, BinaryIO
9
+
10
+ class ChunkrAsync(ChunkrBase):
11
+ """Asynchronous Chunkr API client"""
12
+
13
+ def __init__(self, url: str = None, api_key: str = None):
14
+ super().__init__(url, api_key)
15
+ self._client = httpx.AsyncClient()
16
+
17
+ async def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponseAsync:
18
+ if not self._client or self._client.is_closed:
19
+ self._client = httpx.AsyncClient()
20
+ try:
21
+ task = await self.create_task(file, config)
22
+ return await task.poll()
23
+ except Exception as e:
24
+ await self._client.aclose()
25
+ raise e
26
+
27
+ async def update(self, task_id: str, config: Configuration) -> TaskResponseAsync:
28
+ if not self._client or self._client.is_closed:
29
+ self._client = httpx.AsyncClient()
30
+ try:
31
+ task = await self.update_task(task_id, config)
32
+ return await task.poll()
33
+ except Exception as e:
34
+ await self._client.aclose()
35
+ raise e
36
+
37
+ async def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponseAsync:
38
+ if not self._client or self._client.is_closed:
39
+ self._client = httpx.AsyncClient()
40
+ try:
41
+ files = prepare_upload_data(file, config)
42
+ r = await self._client.post(
43
+ f"{self.url}/api/v1/task",
44
+ files=files,
45
+ headers=self._headers()
46
+ )
47
+ r.raise_for_status()
48
+ return TaskResponseAsync(**r.json()).with_client(self)
49
+ except Exception as e:
50
+ await self._client.aclose()
51
+ raise e
52
+
53
+ async def update_task(self, task_id: str, config: Configuration) -> TaskResponseAsync:
54
+ if not self._client or self._client.is_closed:
55
+ self._client = httpx.AsyncClient()
56
+ try:
57
+ files = prepare_upload_data(None, config)
58
+ r = await self._client.patch(
59
+ f"{self.url}/api/v1/task/{task_id}",
60
+ files=files,
61
+ headers=self._headers()
62
+ )
63
+
64
+ r.raise_for_status()
65
+ return TaskResponseAsync(**r.json()).with_client(self)
66
+ except Exception as e:
67
+ await self._client.aclose()
68
+ raise e
69
+
70
+ async def get_task(self, task_id: str) -> TaskResponseAsync:
71
+ if not self._client or self._client.is_closed:
72
+ self._client = httpx.AsyncClient()
73
+ try:
74
+ r = await self._client.get(
75
+ f"{self.url}/api/v1/task/{task_id}",
76
+ headers=self._headers()
77
+ )
78
+ r.raise_for_status()
79
+ return TaskResponseAsync(**r.json()).with_client(self)
80
+ except Exception as e:
81
+ await self._client.aclose()
82
+ raise e
83
+
84
+ async def delete_task(self, task_id: str) -> None:
85
+ if not self._client or self._client.is_closed:
86
+ self._client = httpx.AsyncClient()
87
+ try:
88
+ r = await self._client.delete(
89
+ f"{self.url}/api/v1/task/{task_id}",
90
+ headers=self._headers()
91
+ )
92
+ r.raise_for_status()
93
+ except Exception as e:
94
+ await self._client.aclose()
95
+ raise e
96
+
97
+ async def cancel_task(self, task_id: str) -> None:
98
+ if not self._client or self._client.is_closed:
99
+ self._client = httpx.AsyncClient()
100
+ try:
101
+ r = await self._client.get(
102
+ f"{self.url}/api/v1/task/{task_id}/cancel",
103
+ headers=self._headers()
104
+ )
105
+ r.raise_for_status()
106
+ except Exception as e:
107
+ await self._client.aclose()
108
+ raise e
109
+
110
+ async def __aenter__(self):
111
+ return self
112
+
113
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
114
+ await self._client.aclose()
@@ -1,20 +1,35 @@
1
- from .chunkr_base import ChunkrBase
2
1
  from .config import Configuration
3
2
  from .task import TaskResponse
3
+ from .task_async import TaskResponseAsync
4
+ from .auth import HeadersMixin
5
+ from abc import abstractmethod
6
+ from dotenv import load_dotenv
7
+ import os
4
8
  from pathlib import Path
5
9
  from PIL import Image
6
- import requests
7
- from typing import Union, BinaryIO
8
- from .misc import prepare_upload_data
10
+ from typing import BinaryIO, Union
9
11
 
10
- class Chunkr(ChunkrBase):
11
- """Chunkr API client"""
12
+ class ChunkrBase(HeadersMixin):
13
+ """Base class with shared functionality for Chunkr API clients."""
12
14
 
13
15
  def __init__(self, url: str = None, api_key: str = None):
14
- super().__init__(url, api_key)
15
- self._session = requests.Session()
16
+ load_dotenv()
17
+ self.url = (
18
+ url or
19
+ os.getenv('CHUNKR_URL') or
20
+ 'https://api.chunkr.ai'
21
+ )
22
+ self._api_key = (
23
+ api_key or
24
+ os.getenv('CHUNKR_API_KEY')
25
+ )
26
+ if not self._api_key:
27
+ raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
28
+
29
+ self.url = self.url.rstrip("/")
16
30
 
17
- def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
31
+ @abstractmethod
32
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> Union[TaskResponse, TaskResponseAsync]:
18
33
  """Upload a file and wait for processing to complete.
19
34
 
20
35
  Args:
@@ -22,32 +37,32 @@ class Chunkr(ChunkrBase):
22
37
  config: Configuration options for processing. Optional.
23
38
 
24
39
  Examples:
25
- ```
40
+ ```python
26
41
  # Upload from file path
27
- chunkr.upload("document.pdf")
42
+ await chunkr.upload("document.pdf")
28
43
 
44
+ # Upload from opened file
45
+ with open("document.pdf", "rb") as f:
46
+ await chunkr.upload(f)
47
+
29
48
  # Upload from URL
30
- chunkr.upload("https://example.com/document.pdf")
49
+ await chunkr.upload("https://example.com/document.pdf")
31
50
 
32
51
  # Upload from base64 string (must include MIME type header)
33
- chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
34
-
35
- # Upload from opened file
36
- with open("document.pdf", "rb") as f:
37
- chunkr.upload(f)
52
+ await chunkr.upload("data:application/pdf;base64,JVBERi0...")
38
53
 
39
54
  # Upload an image
40
55
  from PIL import Image
41
56
  img = Image.open("photo.jpg")
42
- chunkr.upload(img)
57
+ await chunkr.upload(img)
43
58
  ```
44
59
  Returns:
45
60
  TaskResponse: The completed task response
46
61
  """
47
- task = self.create_task(file, config)
48
- return task.poll()
62
+ pass
49
63
 
50
- def update(self, task_id: str, config: Configuration) -> TaskResponse:
64
+ @abstractmethod
65
+ def update(self, task_id: str, config: Configuration) -> Union[TaskResponse, TaskResponseAsync]:
51
66
  """Update a task by its ID and wait for processing to complete.
52
67
 
53
68
  Args:
@@ -57,11 +72,11 @@ class Chunkr(ChunkrBase):
57
72
  Returns:
58
73
  TaskResponse: The updated task response
59
74
  """
60
- task = self.update_task(task_id, config)
61
- return task.poll()
75
+ pass
62
76
 
63
- def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
64
- """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`
77
+ @abstractmethod
78
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> Union[TaskResponse, TaskResponseAsync]:
79
+ """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
65
80
 
66
81
  Args:
67
82
  file: The file to upload.
@@ -70,60 +85,44 @@ class Chunkr(ChunkrBase):
70
85
  Examples:
71
86
  ```
72
87
  # Upload from file path
73
- task = chunkr.start_upload("document.pdf")
88
+ task = await chunkr.create_task("document.pdf")
74
89
 
75
90
  # Upload from opened file
76
91
  with open("document.pdf", "rb") as f:
77
- task = chunkr.start_upload(f)
78
-
92
+ task = await chunkr.create_task(f)
93
+
79
94
  # Upload from URL
80
- task = chunkr.start_upload("https://example.com/document.pdf")
95
+ task = await chunkr.create_task("https://example.com/document.pdf")
81
96
 
82
97
  # Upload from base64 string (must include MIME type header)
83
- task = chunkr.start_upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
98
+ task = await chunkr.create_task("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
84
99
 
85
100
  # Upload an image
86
101
  from PIL import Image
87
102
  img = Image.open("photo.jpg")
88
- task = chunkr.start_upload(img)
103
+ task = await chunkr.create_task(img)
89
104
 
90
105
  # Wait for the task to complete - this can be done when needed
91
- task.poll()
106
+ await task.poll()
92
107
  ```
93
-
94
- Returns:
95
- TaskResponse: The initial task response
96
108
  """
97
- files= prepare_upload_data(file, config)
98
- r = self._session.post(
99
- f"{self.url}/api/v1/task",
100
- files=files,
101
- headers=self._headers()
102
- )
103
- r.raise_for_status()
104
- return TaskResponse(**r.json()).with_client(self)
105
-
106
- def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
- """Update a task by its ID.
109
+ pass
110
+
111
+ @abstractmethod
112
+ def update_task(self, task_id: str, config: Configuration) -> Union[TaskResponse, TaskResponseAsync]:
113
+ """Update a task by its ID and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
108
114
 
109
115
  Args:
110
116
  task_id: The ID of the task to update
111
- config: The new configuration to use
117
+ config: Configuration options for processing. Optional.
112
118
 
113
119
  Returns:
114
120
  TaskResponse: The updated task response
115
121
  """
116
- files = prepare_upload_data(None, config)
117
- r = self._session.patch(
118
- f"{self.url}/api/v1/task/{task_id}",
119
- files=files,
120
- headers=self._headers()
121
- )
122
-
123
- r.raise_for_status()
124
- return TaskResponse(**r.json()).with_client(self)
122
+ pass
125
123
 
126
- def get_task(self, task_id: str) -> TaskResponse:
124
+ @abstractmethod
125
+ def get_task(self, task_id: str) -> Union[TaskResponse, TaskResponseAsync]:
127
126
  """Get a task response by its ID.
128
127
 
129
128
  Args:
@@ -132,34 +131,23 @@ class Chunkr(ChunkrBase):
132
131
  Returns:
133
132
  TaskResponse: The task response
134
133
  """
135
- r = self._session.get(
136
- f"{self.url}/api/v1/task/{task_id}",
137
- headers=self._headers()
138
- )
139
- r.raise_for_status()
140
- return TaskResponse(**r.json()).with_client(self)
141
-
134
+ pass
142
135
 
136
+ @abstractmethod
143
137
  def delete_task(self, task_id: str) -> None:
144
138
  """Delete a task by its ID.
145
139
 
146
140
  Args:
147
141
  task_id: The ID of the task to delete
148
142
  """
149
- r = self._session.delete(
150
- f"{self.url}/api/v1/task/{task_id}",
151
- headers=self._headers()
152
- )
153
- r.raise_for_status()
154
-
143
+ pass
144
+
145
+ @abstractmethod
155
146
  def cancel_task(self, task_id: str) -> None:
156
147
  """Cancel a task by its ID.
157
148
 
158
149
  Args:
159
150
  task_id: The ID of the task to cancel
160
151
  """
161
- r = self._session.get(
162
- f"{self.url}/api/v1/task/{task_id}/cancel",
163
- headers=self._headers()
164
- )
165
- r.raise_for_status()
152
+ pass
153
+
@@ -1,14 +1,14 @@
1
- from typing import runtime_checkable, Protocol
1
+ from typing import Optional, runtime_checkable, Protocol
2
2
  from requests import Session
3
- from httpx import AsyncClient
3
+ from aiohttp import ClientSession
4
4
 
5
5
  @runtime_checkable
6
6
  class ChunkrClientProtocol(Protocol):
7
7
  """Protocol defining the interface for Chunkr clients"""
8
8
  url: str
9
9
  _api_key: str
10
- _session: Session
11
- _client: AsyncClient
10
+ _session: Optional[Session] = None
11
+ _client: Optional[ClientSession] = None
12
12
 
13
13
  def get_api_key(self) -> str:
14
14
  """Get the API key"""
@@ -7,6 +7,10 @@ class TaskResponse(TaskBase):
7
7
  def _poll_request(self) -> dict:
8
8
  while True:
9
9
  try:
10
+ if not self.task_url:
11
+ raise ValueError("Task URL not found in response")
12
+ if not self._client._session:
13
+ raise ValueError("Client session not found")
10
14
  r = self._client._session.get(self.task_url, headers=self._client._headers())
11
15
  r.raise_for_status()
12
16
  return r.json()
@@ -17,10 +21,8 @@ class TaskResponse(TaskBase):
17
21
  raise
18
22
 
19
23
  def poll(self) -> 'TaskResponse':
20
- if not self.task_url:
21
- raise ValueError("Task URL not found in response")
22
24
  while True:
23
- response = self._poll_request_sync()
25
+ response = self._poll_request()
24
26
  updated_task = TaskResponse(**response).with_client(self._client)
25
27
  self.__dict__.update(updated_task.__dict__)
26
28
  if result := self._check_status():
@@ -30,9 +32,11 @@ class TaskResponse(TaskBase):
30
32
  def update(self, config: Configuration) -> 'TaskResponse':
31
33
  if not self.task_url:
32
34
  raise ValueError("Task URL not found")
35
+ if not self._client._session:
36
+ raise ValueError("Client session not found")
33
37
  files = prepare_upload_data(None, config)
34
38
  r = self._client._session.patch(
35
- f"{self.task_url}",
39
+ self.task_url,
36
40
  files=files,
37
41
  headers=self._client._headers()
38
42
  )
@@ -44,6 +48,8 @@ class TaskResponse(TaskBase):
44
48
  def cancel(self):
45
49
  if not self.task_url:
46
50
  raise ValueError("Task URL not found")
51
+ if not self._client._session:
52
+ raise ValueError("Client session not found")
47
53
  r = self._client._session.get(
48
54
  f"{self.task_url}/cancel",
49
55
  headers=self._client._headers()
@@ -54,6 +60,8 @@ class TaskResponse(TaskBase):
54
60
  def delete(self):
55
61
  if not self.task_url:
56
62
  raise ValueError("Task URL not found")
63
+ if not self._client._session:
64
+ raise ValueError("Client session not found")
57
65
  r = self._client._session.delete(
58
66
  self.task_url,
59
67
  headers=self._client._headers()
@@ -6,6 +6,8 @@ import asyncio
6
6
  class TaskResponseAsync(TaskBase):
7
7
  async def _poll_request(self) -> dict:
8
8
  try:
9
+ if not self._client._client:
10
+ raise ValueError("Client not found")
9
11
  r = await self._client._client.get(self.task_url, headers=self._client._headers())
10
12
  r.raise_for_status()
11
13
  return r.json()
@@ -18,6 +20,8 @@ class TaskResponseAsync(TaskBase):
18
20
  async def poll(self) -> 'TaskResponseAsync':
19
21
  if not self.task_url:
20
22
  raise ValueError("Task URL not found")
23
+ if not self._client._client:
24
+ raise ValueError("Client not found")
21
25
  while True:
22
26
  j = await self._poll_request()
23
27
  updated = TaskResponseAsync(**j).with_client(self._client)
@@ -29,6 +33,8 @@ class TaskResponseAsync(TaskBase):
29
33
  async def update(self, config: Configuration) -> 'TaskResponseAsync':
30
34
  if not self.task_url:
31
35
  raise ValueError("Task URL not found")
36
+ if not self._client._client:
37
+ raise ValueError("Client not found")
32
38
  f = prepare_upload_data(None, config)
33
39
  r = await self._client._client.patch(self.task_url, files=f, headers=self._client._headers())
34
40
  r.raise_for_status()
@@ -39,6 +45,8 @@ class TaskResponseAsync(TaskBase):
39
45
  async def cancel(self):
40
46
  if not self.task_url:
41
47
  raise ValueError("Task URL not found")
48
+ if not self._client._client:
49
+ raise ValueError("Client not found")
42
50
  r = await self._client._client.get(f"{self.task_url}/cancel", headers=self._client._headers())
43
51
  r.raise_for_status()
44
52
  return await self.poll()
@@ -46,5 +54,7 @@ class TaskResponseAsync(TaskBase):
46
54
  async def delete(self):
47
55
  if not self.task_url:
48
56
  raise ValueError("Task URL not found")
57
+ if not self._client._client:
58
+ raise ValueError("Client not found")
49
59
  r = await self._client._client.delete(self.task_url, headers=self._client._headers())
50
- r.raise_for_status()
60
+ r.raise_for_status()
@@ -1,8 +1,7 @@
1
- from .config import Configuration
1
+ from .config import Configuration, Status, OutputResponse
2
2
  from .protocol import ChunkrClientProtocol
3
- from ..models import Status, OutputResponse
4
3
  from abc import ABC, abstractmethod
5
- from typing import TypeVar, Optional, Generic, Union
4
+ from typing import TypeVar, Optional, Generic
6
5
  from pydantic import BaseModel, PrivateAttr
7
6
  from datetime import datetime
8
7
 
@@ -23,7 +22,7 @@ class TaskBase(BaseModel, ABC, Generic[T]):
23
22
  status: Status
24
23
  task_id: str
25
24
  task_url: Optional[str]
26
- _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
25
+ _client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
27
26
 
28
27
  @abstractmethod
29
28
  def _poll_request(self) -> dict:
@@ -50,7 +49,7 @@ class TaskBase(BaseModel, ABC, Generic[T]):
50
49
  """Delete the task."""
51
50
  pass
52
51
 
53
- def with_client(self, client: Union[ChunkrClientProtocol]) -> T:
52
+ def with_client(self, client: ChunkrClientProtocol) -> T:
54
53
  self._client = client
55
54
  return self
56
55
 
@@ -1,13 +1,12 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.12
3
+ Version: 0.0.14
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.25.0
10
- Requires-Dist: httpx>=0.25.0
11
10
  Requires-Dist: pillow>=10.0.0
12
11
  Requires-Dist: pydantic>=2.0.0
13
12
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -81,7 +80,7 @@ async def process_document():
81
80
  # If you want to upload without waiting for processing
82
81
  task = await chunkr.start_upload("document.pdf")
83
82
  # ... do other things ...
84
- await task.poll_async() # Check status when needed
83
+ await task.poll() # Check status when needed
85
84
  ```
86
85
 
87
86
  ### Additional Features
@@ -9,7 +9,9 @@ src/chunkr_ai.egg-info/dependency_links.txt
9
9
  src/chunkr_ai.egg-info/requires.txt
10
10
  src/chunkr_ai.egg-info/top_level.txt
11
11
  src/chunkr_ai/api/__init__.py
12
+ src/chunkr_ai/api/api.py
12
13
  src/chunkr_ai/api/auth.py
14
+ src/chunkr_ai/api/base.py
13
15
  src/chunkr_ai/api/chunkr.py
14
16
  src/chunkr_ai/api/chunkr_async.py
15
17
  src/chunkr_ai/api/chunkr_base.py
@@ -1,5 +1,4 @@
1
1
  httpx>=0.25.0
2
- httpx>=0.25.0
3
2
  pillow>=10.0.0
4
3
  pydantic>=2.0.0
5
4
  pytest-asyncio>=0.21.0
@@ -38,7 +38,7 @@ async def test_send_file_path(chunkr_client, sample_path):
38
38
  client_type, client = chunkr_client
39
39
  response = await client.upload(sample_path) if client_type == "async" else client.upload(sample_path)
40
40
 
41
- assert isinstance(response, TaskResponse)
41
+
42
42
  assert response.task_id is not None
43
43
  assert response.status == "Succeeded"
44
44
  assert response.output is not None
@@ -48,7 +48,7 @@ async def test_send_file_path_str(chunkr_client, sample_path):
48
48
  client_type, client = chunkr_client
49
49
  response = await client.upload(str(sample_path)) if client_type == "async" else client.upload(str(sample_path))
50
50
 
51
- assert isinstance(response, TaskResponse)
51
+
52
52
  assert response.task_id is not None
53
53
  assert response.status == "Succeeded"
54
54
  assert response.output is not None
@@ -59,7 +59,7 @@ async def test_send_opened_file(chunkr_client, sample_path):
59
59
  with open(sample_path, 'rb') as f:
60
60
  response = await client.upload(f) if client_type == "async" else client.upload(f)
61
61
 
62
- assert isinstance(response, TaskResponse)
62
+
63
63
  assert response.task_id is not None
64
64
  assert response.status == "Succeeded"
65
65
  assert response.output is not None
@@ -69,7 +69,6 @@ async def test_send_pil_image(chunkr_client, sample_image):
69
69
  client_type, client = chunkr_client
70
70
  response = await client.upload(sample_image) if client_type == "async" else client.upload(sample_image)
71
71
 
72
- assert isinstance(response, TaskResponse)
73
72
  assert response.task_id is not None
74
73
  assert response.status == "Succeeded"
75
74
 
@@ -82,7 +81,6 @@ async def test_ocr_auto(chunkr_client, sample_path):
82
81
  ocr_strategy=OcrStrategy.AUTO
83
82
  ))
84
83
 
85
- assert isinstance(response, TaskResponse)
86
84
  assert response.task_id is not None
87
85
  assert response.status == "Succeeded"
88
86
  assert response.output is not None
@@ -96,7 +94,7 @@ async def test_expires_in(chunkr_client, sample_path):
96
94
  expires_in=10
97
95
  ))
98
96
 
99
- assert isinstance(response, TaskResponse)
97
+
100
98
  assert response.task_id is not None
101
99
  assert response.status == "Succeeded"
102
100
  assert response.output is not None
@@ -114,7 +112,7 @@ async def test_chunk_processing(chunkr_client, sample_path):
114
112
  )
115
113
  ))
116
114
 
117
- assert isinstance(response, TaskResponse)
115
+
118
116
  assert response.task_id is not None
119
117
  assert response.status == "Succeeded"
120
118
  assert response.output is not None
@@ -128,7 +126,6 @@ async def test_segmentation_strategy_page(chunkr_client, sample_path):
128
126
  segmentation_strategy=SegmentationStrategy.PAGE
129
127
  ))
130
128
 
131
- assert isinstance(response, TaskResponse)
132
129
  assert response.task_id is not None
133
130
  assert response.status == "Succeeded"
134
131
  assert response.output is not None
@@ -152,7 +149,7 @@ async def test_page_llm_html(chunkr_client, sample_path):
152
149
  )
153
150
  ))
154
151
 
155
- assert isinstance(response, TaskResponse)
152
+
156
153
  assert response.task_id is not None
157
154
  assert response.status == "Succeeded"
158
155
  assert response.output is not None
@@ -160,15 +157,7 @@ async def test_page_llm_html(chunkr_client, sample_path):
160
157
  @pytest.mark.asyncio
161
158
  async def test_page_llm(chunkr_client, sample_path):
162
159
  client_type, client = chunkr_client
163
- response = await client.upload(sample_path, Configuration(
164
- segmentation_strategy=SegmentationStrategy.PAGE,
165
- segment_processing=SegmentProcessing(
166
- page=GenerationConfig(
167
- html=GenerationStrategy.LLM,
168
- markdown=GenerationStrategy.LLM
169
- )
170
- )
171
- )) if client_type == "async" else client.upload(sample_path, Configuration(
160
+ configuration = Configuration(
172
161
  segmentation_strategy=SegmentationStrategy.PAGE,
173
162
  segment_processing=SegmentProcessing(
174
163
  page=GenerationConfig(
@@ -176,9 +165,10 @@ async def test_page_llm(chunkr_client, sample_path):
176
165
  markdown=GenerationStrategy.LLM
177
166
  )
178
167
  )
179
- ))
168
+ )
169
+
170
+ response = await client.upload(sample_path, configuration) if client_type == "async" else client.upload(sample_path, configuration)
180
171
 
181
- assert isinstance(response, TaskResponse)
182
172
  assert response.task_id is not None
183
173
  assert response.status == "Succeeded"
184
174
  assert response.output is not None
@@ -204,16 +194,16 @@ async def test_json_schema(chunkr_client, sample_path):
204
194
  )
205
195
  ))
206
196
 
207
- assert isinstance(response, TaskResponse)
208
197
  assert response.task_id is not None
209
198
  if response.status != "Succeeded":
210
199
  raise ValueError(f"Task failed with message: {response.message}")
211
200
  assert response.output is not None
201
+
212
202
  @pytest.mark.asyncio
213
203
  async def test_delete_task(chunkr_client, sample_path):
214
204
  client_type, client = chunkr_client
215
205
  response = await client.upload(sample_path) if client_type == "async" else client.upload(sample_path)
216
- assert isinstance(response, TaskResponse)
206
+
217
207
  assert response.task_id is not None
218
208
  assert response.status == "Succeeded"
219
209
  assert response.output is not None
@@ -249,14 +239,14 @@ async def test_delete_task_direct(chunkr_client, sample_path):
249
239
  async def test_cancel_task(chunkr_client, sample_path):
250
240
  client_type, client = chunkr_client
251
241
  response = await client.create_task(sample_path) if client_type == "async" else client.create_task(sample_path)
252
- assert isinstance(response, TaskResponse)
242
+
253
243
  assert response.task_id is not None
254
244
  assert response.status == "Starting"
255
245
 
256
246
  if client_type == "async":
257
247
  await client.cancel_task(response.task_id)
258
248
  assert (await client.get_task(response.task_id)).status == "Cancelled"
259
- await response.poll_async()
249
+ await response.poll()
260
250
  else:
261
251
  client.cancel_task(response.task_id)
262
252
  assert client.get_task(response.task_id).status == "Cancelled"
@@ -290,7 +280,7 @@ async def test_update_task(chunkr_client, sample_path):
290
280
  segmentation_strategy=SegmentationStrategy.PAGE,
291
281
  )
292
282
  response = await client.upload(sample_path, original_config) if client_type == "async" else client.upload(sample_path, original_config)
293
- assert isinstance(response, TaskResponse)
283
+
294
284
  assert response.task_id is not None
295
285
  assert response.status == "Succeeded"
296
286
  assert response.output is not None
@@ -1,144 +0,0 @@
1
- from .chunkr_base import ChunkrBase
2
- from .task import TaskResponse
3
- from .config import Configuration
4
- import httpx
5
- from pathlib import Path
6
- from PIL import Image
7
- from typing import Union, BinaryIO
8
- from .misc import prepare_upload_data
9
-
10
- class ChunkrAsync(ChunkrBase):
11
- """Asynchronous Chunkr API client"""
12
-
13
- def __init__(self, url: str = None, api_key: str = None):
14
- super().__init__(url, api_key)
15
- self._client = httpx.AsyncClient()
16
-
17
- async def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
18
- """Upload a file and wait for processing to complete.
19
-
20
- Args:
21
- file: The file to upload.
22
- config: Configuration options for processing. Optional.
23
-
24
- Examples:
25
- ```python
26
- # Upload from file path
27
- await chunkr.upload("document.pdf")
28
-
29
- # Upload from opened file
30
- with open("document.pdf", "rb") as f:
31
- await chunkr.upload(f)
32
-
33
- # Upload from URL
34
- await chunkr.upload("https://example.com/document.pdf")
35
-
36
- # Upload from base64 string (must include MIME type header)
37
- await chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
38
-
39
- # Upload an image
40
- from PIL import Image
41
- img = Image.open("photo.jpg")
42
- await chunkr.upload(img)
43
- ```
44
- Returns:
45
- TaskResponse: The completed task response
46
- """
47
- task = await self.create_task(file, config)
48
- return await task.poll_async()
49
-
50
- async def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
- """Update a task by its ID and wait for processing to complete.
52
-
53
- Args:
54
- task_id: The ID of the task to update
55
- config: Configuration options for processing. Optional.
56
-
57
- Returns:
58
- TaskResponse: The updated task response
59
- """
60
- task = await self.update_task(task_id, config)
61
- return await task.poll_async()
62
-
63
- async def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
64
- """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll_async()`.
65
-
66
- Args:
67
- file: The file to upload.
68
- config: Configuration options for processing. Optional.
69
-
70
- Examples:
71
- ```
72
- # Upload from file path
73
- task = await chunkr.start_upload("document.pdf")
74
-
75
- # Upload from opened file
76
- with open("document.pdf", "rb") as f:
77
- task = await chunkr.start_upload(f)
78
-
79
- # Upload from URL
80
- task = await chunkr.start_upload("https://example.com/document.pdf")
81
-
82
- # Upload from base64 string (must include MIME type header)
83
- task = await chunkr.start_upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
84
-
85
- # Upload an image
86
- from PIL import Image
87
- img = Image.open("photo.jpg")
88
- task = await chunkr.start_upload(img)
89
-
90
- # Wait for the task to complete - this can be done when needed
91
- await task.poll_async()
92
- ```
93
-
94
- Returns:
95
- TaskResponse: The initial task response
96
- """
97
- files = prepare_upload_data(file, config)
98
- r = await self._client.post(
99
- f"{self.url}/api/v1/task",
100
- files=files,
101
- headers=self._headers()
102
- )
103
- r.raise_for_status()
104
- return TaskResponse(**r.json()).with_client(self)
105
-
106
- async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
- files = prepare_upload_data(None, config)
108
- r = await self._client.patch(
109
- f"{self.url}/api/v1/task/{task_id}",
110
- files=files,
111
- headers=self._headers()
112
- )
113
-
114
- r.raise_for_status()
115
- return TaskResponse(**r.json()).with_client(self)
116
-
117
- async def get_task(self, task_id: str) -> TaskResponse:
118
- r = await self._client.get(
119
- f"{self.url}/api/v1/task/{task_id}",
120
- headers=self._headers()
121
- )
122
- r.raise_for_status()
123
- return TaskResponse(**r.json()).with_client(self)
124
-
125
- async def delete_task(self, task_id: str) -> None:
126
- r = await self._client.delete(
127
- f"{self.url}/api/v1/task/{task_id}",
128
- headers=self._headers()
129
- )
130
- r.raise_for_status()
131
-
132
- async def cancel_task(self, task_id: str) -> None:
133
- r = await self._client.get(
134
- f"{self.url}/api/v1/task/{task_id}/cancel",
135
- headers=self._headers()
136
- )
137
- r.raise_for_status()
138
-
139
-
140
- async def __aenter__(self):
141
- return self
142
-
143
- async def __aexit__(self, exc_type, exc_val, exc_tb):
144
- await self._client.aclose()
@@ -1,85 +0,0 @@
1
- from .config import Configuration
2
- from .task import TaskResponse
3
- from .auth import HeadersMixin
4
- from abc import abstractmethod
5
- from dotenv import load_dotenv
6
- import os
7
- from pathlib import Path
8
- from PIL import Image
9
- from typing import BinaryIO, Union
10
-
11
- class ChunkrBase(HeadersMixin):
12
- """Base class with shared functionality for Chunkr API clients."""
13
-
14
- def __init__(self, url: str = None, api_key: str = None):
15
- load_dotenv()
16
- self.url = (
17
- url or
18
- os.getenv('CHUNKR_URL') or
19
- 'https://api.chunkr.ai'
20
- )
21
- self._api_key = (
22
- api_key or
23
- os.getenv('CHUNKR_API_KEY')
24
- )
25
- if not self._api_key:
26
- raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
27
-
28
- self.url = self.url.rstrip("/")
29
-
30
- @abstractmethod
31
- def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
32
- """Upload a file and wait for processing to complete.
33
-
34
- Must be implemented by subclasses.
35
- """
36
- pass
37
-
38
- @abstractmethod
39
- def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
40
- """Update a task by its ID.
41
-
42
- Must be implemented by subclasses.
43
- """
44
- pass
45
-
46
- @abstractmethod
47
- def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
48
- """Upload a file for processing and immediately return the task response.
49
-
50
- Must be implemented by subclasses.
51
- """
52
- pass
53
-
54
- @abstractmethod
55
- def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
56
- """Update a task by its ID.
57
-
58
- Must be implemented by subclasses.
59
- """
60
- pass
61
-
62
- @abstractmethod
63
- def get_task(self, task_id: str) -> TaskResponse:
64
- """Get a task response by its ID.
65
-
66
- Must be implemented by subclasses.
67
- """
68
- pass
69
-
70
- @abstractmethod
71
- def delete_task(self, task_id: str) -> None:
72
- """Delete a task by its ID.
73
-
74
- Must be implemented by subclasses.
75
- """
76
- pass
77
-
78
- @abstractmethod
79
- def cancel_task(self, task_id: str) -> None:
80
- """Cancel a task by its ID.
81
-
82
- Must be implemented by subclasses.
83
- """
84
- pass
85
-
File without changes
File without changes