chunkr-ai 0.0.14__tar.gz → 0.0.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chunkr_ai-0.0.14/src/chunkr_ai.egg-info → chunkr_ai-0.0.16}/PKG-INFO +1 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/pyproject.toml +1 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/__init__.py +1 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/auth.py +4 -4
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/base.py +58 -48
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr.py +21 -20
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr_async.py +26 -20
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr_base.py +34 -27
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/config.py +41 -14
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/misc.py +52 -44
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/protocol.py +5 -3
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/schema.py +66 -58
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task.py +13 -16
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task_async.py +16 -7
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task_base.py +4 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/models.py +23 -22
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/tests/test_chunkr.py +239 -122
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/LICENSE +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/README.md +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/setup.cfg +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/__init__.py +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/api.py +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/requires.txt +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "chunkr-ai"
|
7
|
-
version = "0.0.
|
7
|
+
version = "0.0.16"
|
8
8
|
authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
|
9
9
|
description = "Python client for Chunkr: open source document intelligence"
|
10
10
|
readme = "README.md"
|
@@ -1,12 +1,12 @@
|
|
1
1
|
class HeadersMixin:
|
2
2
|
"""Mixin class for handling authorization headers"""
|
3
|
-
|
3
|
+
|
4
4
|
def get_api_key(self) -> str:
|
5
5
|
"""Get the API key"""
|
6
|
-
if not hasattr(self,
|
6
|
+
if not hasattr(self, "_api_key") or not self._api_key:
|
7
7
|
raise ValueError("API key not set")
|
8
8
|
return self._api_key
|
9
|
-
|
9
|
+
|
10
10
|
def _headers(self) -> dict:
|
11
11
|
"""Generate authorization headers"""
|
12
|
-
return {"Authorization": self.get_api_key()}
|
12
|
+
return {"Authorization": self.get_api_key()}
|
@@ -11,28 +11,23 @@ from PIL import Image
|
|
11
11
|
import requests
|
12
12
|
from typing import BinaryIO, Tuple, Union
|
13
13
|
|
14
|
+
|
14
15
|
class ChunkrBase(HeadersMixin):
|
15
16
|
"""Base class with shared functionality for Chunkr API clients."""
|
16
17
|
|
17
18
|
def __init__(self, url: str = None, api_key: str = None):
|
18
19
|
load_dotenv()
|
19
|
-
self.url = (
|
20
|
-
|
21
|
-
os.getenv('CHUNKR_URL') or
|
22
|
-
'https://api.chunkr.ai'
|
23
|
-
)
|
24
|
-
self._api_key = (
|
25
|
-
api_key or
|
26
|
-
os.getenv('CHUNKR_API_KEY')
|
27
|
-
)
|
20
|
+
self.url = url or os.getenv("CHUNKR_URL") or "https://api.chunkr.ai"
|
21
|
+
self._api_key = api_key or os.getenv("CHUNKR_API_KEY")
|
28
22
|
if not self._api_key:
|
29
|
-
raise ValueError(
|
30
|
-
|
23
|
+
raise ValueError(
|
24
|
+
"API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai"
|
25
|
+
)
|
26
|
+
|
31
27
|
self.url = self.url.rstrip("/")
|
32
28
|
|
33
29
|
def _prepare_file(
|
34
|
-
self,
|
35
|
-
file: Union[str, Path, BinaryIO, Image.Image]
|
30
|
+
self, file: Union[str, Path, BinaryIO, Image.Image]
|
36
31
|
) -> Tuple[str, BinaryIO]:
|
37
32
|
"""Convert various file types into a tuple of (filename, file-like object).
|
38
33
|
|
@@ -54,40 +49,43 @@ class ChunkrBase(HeadersMixin):
|
|
54
49
|
ValueError: If the MIME type is unsupported
|
55
50
|
"""
|
56
51
|
# Handle URLs
|
57
|
-
if isinstance(file, str) and (
|
52
|
+
if isinstance(file, str) and (
|
53
|
+
file.startswith("http://") or file.startswith("https://")
|
54
|
+
):
|
58
55
|
response = requests.get(file)
|
59
56
|
response.raise_for_status()
|
60
57
|
file_obj = io.BytesIO(response.content)
|
61
|
-
filename = Path(file.split(
|
58
|
+
filename = Path(file.split("/")[-1]).name or "downloaded_file"
|
62
59
|
return filename, file_obj
|
63
60
|
|
64
61
|
# Handle base64 strings
|
65
|
-
if isinstance(file, str) and
|
62
|
+
if isinstance(file, str) and "," in file and ";base64," in file:
|
66
63
|
try:
|
67
64
|
# Split header and data
|
68
|
-
header, base64_data = file.split(
|
65
|
+
header, base64_data = file.split(",", 1)
|
69
66
|
import base64
|
67
|
+
|
70
68
|
file_bytes = base64.b64decode(base64_data)
|
71
69
|
file_obj = io.BytesIO(file_bytes)
|
72
|
-
|
70
|
+
|
73
71
|
# Try to determine format from header
|
74
|
-
format =
|
75
|
-
mime_type = header.split(
|
76
|
-
|
72
|
+
format = "bin"
|
73
|
+
mime_type = header.split(":")[-1].split(";")[0].lower()
|
74
|
+
|
77
75
|
# Map MIME types to file extensions
|
78
76
|
mime_to_ext = {
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
77
|
+
"application/pdf": "pdf",
|
78
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
79
|
+
"application/msword": "doc",
|
80
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
81
|
+
"application/vnd.ms-powerpoint": "ppt",
|
82
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
83
|
+
"application/vnd.ms-excel": "xls",
|
84
|
+
"image/jpeg": "jpg",
|
85
|
+
"image/png": "png",
|
86
|
+
"image/jpg": "jpg",
|
89
87
|
}
|
90
|
-
|
88
|
+
|
91
89
|
if mime_type in mime_to_ext:
|
92
90
|
format = mime_to_ext[mime_type]
|
93
91
|
else:
|
@@ -102,20 +100,24 @@ class ChunkrBase(HeadersMixin):
|
|
102
100
|
path = Path(file).resolve()
|
103
101
|
if not path.exists():
|
104
102
|
raise FileNotFoundError(f"File not found: {file}")
|
105
|
-
return path.name, open(path,
|
103
|
+
return path.name, open(path, "rb")
|
106
104
|
|
107
105
|
# Handle PIL Images
|
108
106
|
if isinstance(file, Image.Image):
|
109
107
|
img_byte_arr = io.BytesIO()
|
110
|
-
format = file.format or
|
108
|
+
format = file.format or "PNG"
|
111
109
|
file.save(img_byte_arr, format=format)
|
112
110
|
img_byte_arr.seek(0)
|
113
111
|
return f"image.{format.lower()}", img_byte_arr
|
114
112
|
|
115
113
|
# Handle file-like objects
|
116
|
-
if hasattr(file,
|
114
|
+
if hasattr(file, "read") and hasattr(file, "seek"):
|
117
115
|
# Try to get the filename from the file object if possible
|
118
|
-
name =
|
116
|
+
name = (
|
117
|
+
getattr(file, "name", "document")
|
118
|
+
if hasattr(file, "name")
|
119
|
+
else "document"
|
120
|
+
)
|
119
121
|
return Path(name).name, file
|
120
122
|
|
121
123
|
raise TypeError(f"Unsupported file type: {type(file)}")
|
@@ -123,43 +125,51 @@ class ChunkrBase(HeadersMixin):
|
|
123
125
|
def _prepare_upload_data(
|
124
126
|
self,
|
125
127
|
file: Union[str, Path, BinaryIO, Image.Image],
|
126
|
-
config: Configuration = None
|
128
|
+
config: Configuration = None,
|
127
129
|
) -> Tuple[dict, dict]:
|
128
130
|
"""Prepare files and data dictionaries for upload.
|
129
|
-
|
131
|
+
|
130
132
|
Args:
|
131
133
|
file: The file to upload
|
132
134
|
config: Optional configuration settings
|
133
|
-
|
135
|
+
|
134
136
|
Returns:
|
135
137
|
Tuple[dict, dict]: (files dict, data dict) ready for upload
|
136
138
|
"""
|
137
139
|
filename, file_obj = self._prepare_file(file)
|
138
140
|
files = {"file": (filename, file_obj)}
|
139
141
|
data = {}
|
140
|
-
|
142
|
+
|
141
143
|
if config:
|
142
144
|
config_dict = config.model_dump(mode="json", exclude_none=True)
|
143
145
|
for key, value in config_dict.items():
|
144
146
|
if isinstance(value, dict):
|
145
|
-
files[key] = (None, json.dumps(value),
|
147
|
+
files[key] = (None, json.dumps(value), "application/json")
|
146
148
|
else:
|
147
149
|
data[key] = value
|
148
|
-
|
150
|
+
|
149
151
|
return files, data
|
150
|
-
|
152
|
+
|
151
153
|
@abstractmethod
|
152
|
-
def upload(
|
154
|
+
def upload(
|
155
|
+
self,
|
156
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
157
|
+
config: Configuration = None,
|
158
|
+
) -> TaskResponse:
|
153
159
|
"""Upload a file and wait for processing to complete.
|
154
|
-
|
160
|
+
|
155
161
|
Must be implemented by subclasses.
|
156
162
|
"""
|
157
163
|
pass
|
158
164
|
|
159
165
|
@abstractmethod
|
160
|
-
def start_upload(
|
166
|
+
def start_upload(
|
167
|
+
self,
|
168
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
169
|
+
config: Configuration = None,
|
170
|
+
) -> TaskResponse:
|
161
171
|
"""Upload a file for processing and immediately return the task response.
|
162
|
-
|
172
|
+
|
163
173
|
Must be implemented by subclasses.
|
164
174
|
"""
|
165
175
|
pass
|
@@ -167,7 +177,7 @@ class ChunkrBase(HeadersMixin):
|
|
167
177
|
@abstractmethod
|
168
178
|
def get_task(self, task_id: str) -> TaskResponse:
|
169
179
|
"""Get a task response by its ID.
|
170
|
-
|
180
|
+
|
171
181
|
Must be implemented by subclasses.
|
172
182
|
"""
|
173
183
|
pass
|
@@ -7,6 +7,7 @@ import requests
|
|
7
7
|
from typing import Union, BinaryIO
|
8
8
|
from .misc import prepare_upload_data
|
9
9
|
|
10
|
+
|
10
11
|
class Chunkr(ChunkrBase):
|
11
12
|
"""Chunkr API client"""
|
12
13
|
|
@@ -14,56 +15,57 @@ class Chunkr(ChunkrBase):
|
|
14
15
|
super().__init__(url, api_key)
|
15
16
|
self._session = requests.Session()
|
16
17
|
|
17
|
-
def upload(
|
18
|
+
def upload(
|
19
|
+
self,
|
20
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
21
|
+
config: Configuration = None,
|
22
|
+
) -> TaskResponse:
|
18
23
|
task = self.create_task(file, config)
|
19
24
|
return task.poll()
|
20
|
-
|
25
|
+
|
21
26
|
def update(self, task_id: str, config: Configuration) -> TaskResponse:
|
22
27
|
task = self.update_task(task_id, config)
|
23
28
|
return task.poll()
|
24
29
|
|
25
|
-
def create_task(
|
26
|
-
|
30
|
+
def create_task(
|
31
|
+
self,
|
32
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
33
|
+
config: Configuration = None,
|
34
|
+
) -> TaskResponse:
|
35
|
+
files = prepare_upload_data(file, config)
|
27
36
|
if not self._session:
|
28
37
|
raise ValueError("Session not found")
|
29
38
|
r = self._session.post(
|
30
|
-
f"{self.url}/api/v1/task",
|
31
|
-
files=files,
|
32
|
-
headers=self._headers()
|
39
|
+
f"{self.url}/api/v1/task", files=files, headers=self._headers()
|
33
40
|
)
|
34
41
|
r.raise_for_status()
|
35
42
|
return TaskResponse(**r.json()).with_client(self)
|
36
|
-
|
43
|
+
|
37
44
|
def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
|
38
45
|
files = prepare_upload_data(None, config)
|
39
46
|
if not self._session:
|
40
47
|
raise ValueError("Session not found")
|
41
48
|
r = self._session.patch(
|
42
|
-
f"{self.url}/api/v1/task/{task_id}",
|
43
|
-
files=files,
|
44
|
-
headers=self._headers()
|
49
|
+
f"{self.url}/api/v1/task/{task_id}", files=files, headers=self._headers()
|
45
50
|
)
|
46
|
-
|
51
|
+
|
47
52
|
r.raise_for_status()
|
48
53
|
return TaskResponse(**r.json()).with_client(self)
|
49
|
-
|
54
|
+
|
50
55
|
def get_task(self, task_id: str) -> TaskResponse:
|
51
56
|
if not self._session:
|
52
57
|
raise ValueError("Session not found")
|
53
58
|
r = self._session.get(
|
54
|
-
f"{self.url}/api/v1/task/{task_id}",
|
55
|
-
headers=self._headers()
|
59
|
+
f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
|
56
60
|
)
|
57
61
|
r.raise_for_status()
|
58
62
|
return TaskResponse(**r.json()).with_client(self)
|
59
63
|
|
60
|
-
|
61
64
|
def delete_task(self, task_id: str) -> None:
|
62
65
|
if not self._session:
|
63
66
|
raise ValueError("Session not found")
|
64
67
|
r = self._session.delete(
|
65
|
-
f"{self.url}/api/v1/task/{task_id}",
|
66
|
-
headers=self._headers()
|
68
|
+
f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
|
67
69
|
)
|
68
70
|
r.raise_for_status()
|
69
71
|
|
@@ -71,7 +73,6 @@ class Chunkr(ChunkrBase):
|
|
71
73
|
if not self._session:
|
72
74
|
raise ValueError("Session not found")
|
73
75
|
r = self._session.get(
|
74
|
-
f"{self.url}/api/v1/task/{task_id}/cancel",
|
75
|
-
headers=self._headers()
|
76
|
+
f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
|
76
77
|
)
|
77
78
|
r.raise_for_status()
|
@@ -7,14 +7,19 @@ from pathlib import Path
|
|
7
7
|
from PIL import Image
|
8
8
|
from typing import Union, BinaryIO
|
9
9
|
|
10
|
+
|
10
11
|
class ChunkrAsync(ChunkrBase):
|
11
12
|
"""Asynchronous Chunkr API client"""
|
12
|
-
|
13
|
+
|
13
14
|
def __init__(self, url: str = None, api_key: str = None):
|
14
15
|
super().__init__(url, api_key)
|
15
16
|
self._client = httpx.AsyncClient()
|
16
17
|
|
17
|
-
async def upload(
|
18
|
+
async def upload(
|
19
|
+
self,
|
20
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
21
|
+
config: Configuration = None,
|
22
|
+
) -> TaskResponseAsync:
|
18
23
|
if not self._client or self._client.is_closed:
|
19
24
|
self._client = httpx.AsyncClient()
|
20
25
|
try:
|
@@ -23,7 +28,7 @@ class ChunkrAsync(ChunkrBase):
|
|
23
28
|
except Exception as e:
|
24
29
|
await self._client.aclose()
|
25
30
|
raise e
|
26
|
-
|
31
|
+
|
27
32
|
async def update(self, task_id: str, config: Configuration) -> TaskResponseAsync:
|
28
33
|
if not self._client or self._client.is_closed:
|
29
34
|
self._client = httpx.AsyncClient()
|
@@ -34,15 +39,17 @@ class ChunkrAsync(ChunkrBase):
|
|
34
39
|
await self._client.aclose()
|
35
40
|
raise e
|
36
41
|
|
37
|
-
async def create_task(
|
42
|
+
async def create_task(
|
43
|
+
self,
|
44
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
45
|
+
config: Configuration = None,
|
46
|
+
) -> TaskResponseAsync:
|
38
47
|
if not self._client or self._client.is_closed:
|
39
48
|
self._client = httpx.AsyncClient()
|
40
49
|
try:
|
41
50
|
files = prepare_upload_data(file, config)
|
42
51
|
r = await self._client.post(
|
43
|
-
f"{self.url}/api/v1/task",
|
44
|
-
files=files,
|
45
|
-
headers=self._headers()
|
52
|
+
f"{self.url}/api/v1/task", files=files, headers=self._headers()
|
46
53
|
)
|
47
54
|
r.raise_for_status()
|
48
55
|
return TaskResponseAsync(**r.json()).with_client(self)
|
@@ -50,7 +57,9 @@ class ChunkrAsync(ChunkrBase):
|
|
50
57
|
await self._client.aclose()
|
51
58
|
raise e
|
52
59
|
|
53
|
-
async def update_task(
|
60
|
+
async def update_task(
|
61
|
+
self, task_id: str, config: Configuration
|
62
|
+
) -> TaskResponseAsync:
|
54
63
|
if not self._client or self._client.is_closed:
|
55
64
|
self._client = httpx.AsyncClient()
|
56
65
|
try:
|
@@ -58,49 +67,46 @@ class ChunkrAsync(ChunkrBase):
|
|
58
67
|
r = await self._client.patch(
|
59
68
|
f"{self.url}/api/v1/task/{task_id}",
|
60
69
|
files=files,
|
61
|
-
headers=self._headers()
|
70
|
+
headers=self._headers(),
|
62
71
|
)
|
63
|
-
|
72
|
+
|
64
73
|
r.raise_for_status()
|
65
74
|
return TaskResponseAsync(**r.json()).with_client(self)
|
66
75
|
except Exception as e:
|
67
76
|
await self._client.aclose()
|
68
77
|
raise e
|
69
|
-
|
78
|
+
|
70
79
|
async def get_task(self, task_id: str) -> TaskResponseAsync:
|
71
80
|
if not self._client or self._client.is_closed:
|
72
81
|
self._client = httpx.AsyncClient()
|
73
82
|
try:
|
74
83
|
r = await self._client.get(
|
75
|
-
f"{self.url}/api/v1/task/{task_id}",
|
76
|
-
headers=self._headers()
|
84
|
+
f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
|
77
85
|
)
|
78
86
|
r.raise_for_status()
|
79
87
|
return TaskResponseAsync(**r.json()).with_client(self)
|
80
88
|
except Exception as e:
|
81
89
|
await self._client.aclose()
|
82
90
|
raise e
|
83
|
-
|
91
|
+
|
84
92
|
async def delete_task(self, task_id: str) -> None:
|
85
93
|
if not self._client or self._client.is_closed:
|
86
94
|
self._client = httpx.AsyncClient()
|
87
95
|
try:
|
88
96
|
r = await self._client.delete(
|
89
|
-
f"{self.url}/api/v1/task/{task_id}",
|
90
|
-
headers=self._headers()
|
97
|
+
f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
|
91
98
|
)
|
92
99
|
r.raise_for_status()
|
93
100
|
except Exception as e:
|
94
101
|
await self._client.aclose()
|
95
102
|
raise e
|
96
|
-
|
103
|
+
|
97
104
|
async def cancel_task(self, task_id: str) -> None:
|
98
105
|
if not self._client or self._client.is_closed:
|
99
106
|
self._client = httpx.AsyncClient()
|
100
107
|
try:
|
101
108
|
r = await self._client.get(
|
102
|
-
f"{self.url}/api/v1/task/{task_id}/cancel",
|
103
|
-
headers=self._headers()
|
109
|
+
f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
|
104
110
|
)
|
105
111
|
r.raise_for_status()
|
106
112
|
except Exception as e:
|
@@ -111,4 +117,4 @@ class ChunkrAsync(ChunkrBase):
|
|
111
117
|
return self
|
112
118
|
|
113
119
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
114
|
-
await self._client.aclose()
|
120
|
+
await self._client.aclose()
|
@@ -9,31 +9,31 @@ from pathlib import Path
|
|
9
9
|
from PIL import Image
|
10
10
|
from typing import BinaryIO, Union
|
11
11
|
|
12
|
+
|
12
13
|
class ChunkrBase(HeadersMixin):
|
13
14
|
"""Base class with shared functionality for Chunkr API clients."""
|
14
15
|
|
15
16
|
def __init__(self, url: str = None, api_key: str = None):
|
16
17
|
load_dotenv()
|
17
|
-
self.url = (
|
18
|
-
|
19
|
-
os.getenv('CHUNKR_URL') or
|
20
|
-
'https://api.chunkr.ai'
|
21
|
-
)
|
22
|
-
self._api_key = (
|
23
|
-
api_key or
|
24
|
-
os.getenv('CHUNKR_API_KEY')
|
25
|
-
)
|
18
|
+
self.url = url or os.getenv("CHUNKR_URL") or "https://api.chunkr.ai"
|
19
|
+
self._api_key = api_key or os.getenv("CHUNKR_API_KEY")
|
26
20
|
if not self._api_key:
|
27
|
-
raise ValueError(
|
28
|
-
|
21
|
+
raise ValueError(
|
22
|
+
"API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai"
|
23
|
+
)
|
24
|
+
|
29
25
|
self.url = self.url.rstrip("/")
|
30
26
|
|
31
27
|
@abstractmethod
|
32
|
-
def upload(
|
28
|
+
def upload(
|
29
|
+
self,
|
30
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
31
|
+
config: Configuration = None,
|
32
|
+
) -> Union[TaskResponse, TaskResponseAsync]:
|
33
33
|
"""Upload a file and wait for processing to complete.
|
34
34
|
|
35
35
|
Args:
|
36
|
-
file: The file to upload.
|
36
|
+
file: The file to upload.
|
37
37
|
config: Configuration options for processing. Optional.
|
38
38
|
|
39
39
|
Examples:
|
@@ -44,7 +44,7 @@ class ChunkrBase(HeadersMixin):
|
|
44
44
|
# Upload from opened file
|
45
45
|
with open("document.pdf", "rb") as f:
|
46
46
|
await chunkr.upload(f)
|
47
|
-
|
47
|
+
|
48
48
|
# Upload from URL
|
49
49
|
await chunkr.upload("https://example.com/document.pdf")
|
50
50
|
|
@@ -60,11 +60,13 @@ class ChunkrBase(HeadersMixin):
|
|
60
60
|
TaskResponse: The completed task response
|
61
61
|
"""
|
62
62
|
pass
|
63
|
-
|
63
|
+
|
64
64
|
@abstractmethod
|
65
|
-
def update(
|
65
|
+
def update(
|
66
|
+
self, task_id: str, config: Configuration
|
67
|
+
) -> Union[TaskResponse, TaskResponseAsync]:
|
66
68
|
"""Update a task by its ID and wait for processing to complete.
|
67
|
-
|
69
|
+
|
68
70
|
Args:
|
69
71
|
task_id: The ID of the task to update
|
70
72
|
config: Configuration options for processing. Optional.
|
@@ -75,7 +77,11 @@ class ChunkrBase(HeadersMixin):
|
|
75
77
|
pass
|
76
78
|
|
77
79
|
@abstractmethod
|
78
|
-
def create_task(
|
80
|
+
def create_task(
|
81
|
+
self,
|
82
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
83
|
+
config: Configuration = None,
|
84
|
+
) -> Union[TaskResponse, TaskResponseAsync]:
|
79
85
|
"""Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
|
80
86
|
|
81
87
|
Args:
|
@@ -90,7 +96,7 @@ class ChunkrBase(HeadersMixin):
|
|
90
96
|
# Upload from opened file
|
91
97
|
with open("document.pdf", "rb") as f:
|
92
98
|
task = await chunkr.create_task(f)
|
93
|
-
|
99
|
+
|
94
100
|
# Upload from URL
|
95
101
|
task = await chunkr.create_task("https://example.com/document.pdf")
|
96
102
|
|
@@ -109,9 +115,11 @@ class ChunkrBase(HeadersMixin):
|
|
109
115
|
pass
|
110
116
|
|
111
117
|
@abstractmethod
|
112
|
-
def update_task(
|
118
|
+
def update_task(
|
119
|
+
self, task_id: str, config: Configuration
|
120
|
+
) -> Union[TaskResponse, TaskResponseAsync]:
|
113
121
|
"""Update a task by its ID and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
|
114
|
-
|
122
|
+
|
115
123
|
Args:
|
116
124
|
task_id: The ID of the task to update
|
117
125
|
config: Configuration options for processing. Optional.
|
@@ -120,11 +128,11 @@ class ChunkrBase(HeadersMixin):
|
|
120
128
|
TaskResponse: The updated task response
|
121
129
|
"""
|
122
130
|
pass
|
123
|
-
|
131
|
+
|
124
132
|
@abstractmethod
|
125
133
|
def get_task(self, task_id: str) -> Union[TaskResponse, TaskResponseAsync]:
|
126
134
|
"""Get a task response by its ID.
|
127
|
-
|
135
|
+
|
128
136
|
Args:
|
129
137
|
task_id: The ID of the task to get
|
130
138
|
|
@@ -136,18 +144,17 @@ class ChunkrBase(HeadersMixin):
|
|
136
144
|
@abstractmethod
|
137
145
|
def delete_task(self, task_id: str) -> None:
|
138
146
|
"""Delete a task by its ID.
|
139
|
-
|
147
|
+
|
140
148
|
Args:
|
141
149
|
task_id: The ID of the task to delete
|
142
150
|
"""
|
143
151
|
pass
|
144
|
-
|
152
|
+
|
145
153
|
@abstractmethod
|
146
154
|
def cancel_task(self, task_id: str) -> None:
|
147
155
|
"""Cancel a task by its ID.
|
148
|
-
|
156
|
+
|
149
157
|
Args:
|
150
158
|
task_id: The ID of the task to cancel
|
151
159
|
"""
|
152
160
|
pass
|
153
|
-
|