chunkr-ai 0.0.14__tar.gz → 0.0.16__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {chunkr_ai-0.0.14/src/chunkr_ai.egg-info → chunkr_ai-0.0.16}/PKG-INFO +1 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/pyproject.toml +1 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/__init__.py +1 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/auth.py +4 -4
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/base.py +58 -48
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr.py +21 -20
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr_async.py +26 -20
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr_base.py +34 -27
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/config.py +41 -14
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/misc.py +52 -44
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/protocol.py +5 -3
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/schema.py +66 -58
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task.py +13 -16
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task_async.py +16 -7
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task_base.py +4 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/models.py +23 -22
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/tests/test_chunkr.py +239 -122
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/LICENSE +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/README.md +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/setup.cfg +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/__init__.py +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai/api/api.py +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/requires.txt +0 -0
- {chunkr_ai-0.0.14 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "chunkr-ai"
|
7
|
-
version = "0.0.
|
7
|
+
version = "0.0.16"
|
8
8
|
authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
|
9
9
|
description = "Python client for Chunkr: open source document intelligence"
|
10
10
|
readme = "README.md"
|
@@ -1,12 +1,12 @@
|
|
1
1
|
class HeadersMixin:
|
2
2
|
"""Mixin class for handling authorization headers"""
|
3
|
-
|
3
|
+
|
4
4
|
def get_api_key(self) -> str:
|
5
5
|
"""Get the API key"""
|
6
|
-
if not hasattr(self,
|
6
|
+
if not hasattr(self, "_api_key") or not self._api_key:
|
7
7
|
raise ValueError("API key not set")
|
8
8
|
return self._api_key
|
9
|
-
|
9
|
+
|
10
10
|
def _headers(self) -> dict:
|
11
11
|
"""Generate authorization headers"""
|
12
|
-
return {"Authorization": self.get_api_key()}
|
12
|
+
return {"Authorization": self.get_api_key()}
|
@@ -11,28 +11,23 @@ from PIL import Image
|
|
11
11
|
import requests
|
12
12
|
from typing import BinaryIO, Tuple, Union
|
13
13
|
|
14
|
+
|
14
15
|
class ChunkrBase(HeadersMixin):
|
15
16
|
"""Base class with shared functionality for Chunkr API clients."""
|
16
17
|
|
17
18
|
def __init__(self, url: str = None, api_key: str = None):
|
18
19
|
load_dotenv()
|
19
|
-
self.url = (
|
20
|
-
|
21
|
-
os.getenv('CHUNKR_URL') or
|
22
|
-
'https://api.chunkr.ai'
|
23
|
-
)
|
24
|
-
self._api_key = (
|
25
|
-
api_key or
|
26
|
-
os.getenv('CHUNKR_API_KEY')
|
27
|
-
)
|
20
|
+
self.url = url or os.getenv("CHUNKR_URL") or "https://api.chunkr.ai"
|
21
|
+
self._api_key = api_key or os.getenv("CHUNKR_API_KEY")
|
28
22
|
if not self._api_key:
|
29
|
-
raise ValueError(
|
30
|
-
|
23
|
+
raise ValueError(
|
24
|
+
"API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai"
|
25
|
+
)
|
26
|
+
|
31
27
|
self.url = self.url.rstrip("/")
|
32
28
|
|
33
29
|
def _prepare_file(
|
34
|
-
self,
|
35
|
-
file: Union[str, Path, BinaryIO, Image.Image]
|
30
|
+
self, file: Union[str, Path, BinaryIO, Image.Image]
|
36
31
|
) -> Tuple[str, BinaryIO]:
|
37
32
|
"""Convert various file types into a tuple of (filename, file-like object).
|
38
33
|
|
@@ -54,40 +49,43 @@ class ChunkrBase(HeadersMixin):
|
|
54
49
|
ValueError: If the MIME type is unsupported
|
55
50
|
"""
|
56
51
|
# Handle URLs
|
57
|
-
if isinstance(file, str) and (
|
52
|
+
if isinstance(file, str) and (
|
53
|
+
file.startswith("http://") or file.startswith("https://")
|
54
|
+
):
|
58
55
|
response = requests.get(file)
|
59
56
|
response.raise_for_status()
|
60
57
|
file_obj = io.BytesIO(response.content)
|
61
|
-
filename = Path(file.split(
|
58
|
+
filename = Path(file.split("/")[-1]).name or "downloaded_file"
|
62
59
|
return filename, file_obj
|
63
60
|
|
64
61
|
# Handle base64 strings
|
65
|
-
if isinstance(file, str) and
|
62
|
+
if isinstance(file, str) and "," in file and ";base64," in file:
|
66
63
|
try:
|
67
64
|
# Split header and data
|
68
|
-
header, base64_data = file.split(
|
65
|
+
header, base64_data = file.split(",", 1)
|
69
66
|
import base64
|
67
|
+
|
70
68
|
file_bytes = base64.b64decode(base64_data)
|
71
69
|
file_obj = io.BytesIO(file_bytes)
|
72
|
-
|
70
|
+
|
73
71
|
# Try to determine format from header
|
74
|
-
format =
|
75
|
-
mime_type = header.split(
|
76
|
-
|
72
|
+
format = "bin"
|
73
|
+
mime_type = header.split(":")[-1].split(";")[0].lower()
|
74
|
+
|
77
75
|
# Map MIME types to file extensions
|
78
76
|
mime_to_ext = {
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
77
|
+
"application/pdf": "pdf",
|
78
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
79
|
+
"application/msword": "doc",
|
80
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
81
|
+
"application/vnd.ms-powerpoint": "ppt",
|
82
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
83
|
+
"application/vnd.ms-excel": "xls",
|
84
|
+
"image/jpeg": "jpg",
|
85
|
+
"image/png": "png",
|
86
|
+
"image/jpg": "jpg",
|
89
87
|
}
|
90
|
-
|
88
|
+
|
91
89
|
if mime_type in mime_to_ext:
|
92
90
|
format = mime_to_ext[mime_type]
|
93
91
|
else:
|
@@ -102,20 +100,24 @@ class ChunkrBase(HeadersMixin):
|
|
102
100
|
path = Path(file).resolve()
|
103
101
|
if not path.exists():
|
104
102
|
raise FileNotFoundError(f"File not found: {file}")
|
105
|
-
return path.name, open(path,
|
103
|
+
return path.name, open(path, "rb")
|
106
104
|
|
107
105
|
# Handle PIL Images
|
108
106
|
if isinstance(file, Image.Image):
|
109
107
|
img_byte_arr = io.BytesIO()
|
110
|
-
format = file.format or
|
108
|
+
format = file.format or "PNG"
|
111
109
|
file.save(img_byte_arr, format=format)
|
112
110
|
img_byte_arr.seek(0)
|
113
111
|
return f"image.{format.lower()}", img_byte_arr
|
114
112
|
|
115
113
|
# Handle file-like objects
|
116
|
-
if hasattr(file,
|
114
|
+
if hasattr(file, "read") and hasattr(file, "seek"):
|
117
115
|
# Try to get the filename from the file object if possible
|
118
|
-
name =
|
116
|
+
name = (
|
117
|
+
getattr(file, "name", "document")
|
118
|
+
if hasattr(file, "name")
|
119
|
+
else "document"
|
120
|
+
)
|
119
121
|
return Path(name).name, file
|
120
122
|
|
121
123
|
raise TypeError(f"Unsupported file type: {type(file)}")
|
@@ -123,43 +125,51 @@ class ChunkrBase(HeadersMixin):
|
|
123
125
|
def _prepare_upload_data(
|
124
126
|
self,
|
125
127
|
file: Union[str, Path, BinaryIO, Image.Image],
|
126
|
-
config: Configuration = None
|
128
|
+
config: Configuration = None,
|
127
129
|
) -> Tuple[dict, dict]:
|
128
130
|
"""Prepare files and data dictionaries for upload.
|
129
|
-
|
131
|
+
|
130
132
|
Args:
|
131
133
|
file: The file to upload
|
132
134
|
config: Optional configuration settings
|
133
|
-
|
135
|
+
|
134
136
|
Returns:
|
135
137
|
Tuple[dict, dict]: (files dict, data dict) ready for upload
|
136
138
|
"""
|
137
139
|
filename, file_obj = self._prepare_file(file)
|
138
140
|
files = {"file": (filename, file_obj)}
|
139
141
|
data = {}
|
140
|
-
|
142
|
+
|
141
143
|
if config:
|
142
144
|
config_dict = config.model_dump(mode="json", exclude_none=True)
|
143
145
|
for key, value in config_dict.items():
|
144
146
|
if isinstance(value, dict):
|
145
|
-
files[key] = (None, json.dumps(value),
|
147
|
+
files[key] = (None, json.dumps(value), "application/json")
|
146
148
|
else:
|
147
149
|
data[key] = value
|
148
|
-
|
150
|
+
|
149
151
|
return files, data
|
150
|
-
|
152
|
+
|
151
153
|
@abstractmethod
|
152
|
-
def upload(
|
154
|
+
def upload(
|
155
|
+
self,
|
156
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
157
|
+
config: Configuration = None,
|
158
|
+
) -> TaskResponse:
|
153
159
|
"""Upload a file and wait for processing to complete.
|
154
|
-
|
160
|
+
|
155
161
|
Must be implemented by subclasses.
|
156
162
|
"""
|
157
163
|
pass
|
158
164
|
|
159
165
|
@abstractmethod
|
160
|
-
def start_upload(
|
166
|
+
def start_upload(
|
167
|
+
self,
|
168
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
169
|
+
config: Configuration = None,
|
170
|
+
) -> TaskResponse:
|
161
171
|
"""Upload a file for processing and immediately return the task response.
|
162
|
-
|
172
|
+
|
163
173
|
Must be implemented by subclasses.
|
164
174
|
"""
|
165
175
|
pass
|
@@ -167,7 +177,7 @@ class ChunkrBase(HeadersMixin):
|
|
167
177
|
@abstractmethod
|
168
178
|
def get_task(self, task_id: str) -> TaskResponse:
|
169
179
|
"""Get a task response by its ID.
|
170
|
-
|
180
|
+
|
171
181
|
Must be implemented by subclasses.
|
172
182
|
"""
|
173
183
|
pass
|
@@ -7,6 +7,7 @@ import requests
|
|
7
7
|
from typing import Union, BinaryIO
|
8
8
|
from .misc import prepare_upload_data
|
9
9
|
|
10
|
+
|
10
11
|
class Chunkr(ChunkrBase):
|
11
12
|
"""Chunkr API client"""
|
12
13
|
|
@@ -14,56 +15,57 @@ class Chunkr(ChunkrBase):
|
|
14
15
|
super().__init__(url, api_key)
|
15
16
|
self._session = requests.Session()
|
16
17
|
|
17
|
-
def upload(
|
18
|
+
def upload(
|
19
|
+
self,
|
20
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
21
|
+
config: Configuration = None,
|
22
|
+
) -> TaskResponse:
|
18
23
|
task = self.create_task(file, config)
|
19
24
|
return task.poll()
|
20
|
-
|
25
|
+
|
21
26
|
def update(self, task_id: str, config: Configuration) -> TaskResponse:
|
22
27
|
task = self.update_task(task_id, config)
|
23
28
|
return task.poll()
|
24
29
|
|
25
|
-
def create_task(
|
26
|
-
|
30
|
+
def create_task(
|
31
|
+
self,
|
32
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
33
|
+
config: Configuration = None,
|
34
|
+
) -> TaskResponse:
|
35
|
+
files = prepare_upload_data(file, config)
|
27
36
|
if not self._session:
|
28
37
|
raise ValueError("Session not found")
|
29
38
|
r = self._session.post(
|
30
|
-
f"{self.url}/api/v1/task",
|
31
|
-
files=files,
|
32
|
-
headers=self._headers()
|
39
|
+
f"{self.url}/api/v1/task", files=files, headers=self._headers()
|
33
40
|
)
|
34
41
|
r.raise_for_status()
|
35
42
|
return TaskResponse(**r.json()).with_client(self)
|
36
|
-
|
43
|
+
|
37
44
|
def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
|
38
45
|
files = prepare_upload_data(None, config)
|
39
46
|
if not self._session:
|
40
47
|
raise ValueError("Session not found")
|
41
48
|
r = self._session.patch(
|
42
|
-
f"{self.url}/api/v1/task/{task_id}",
|
43
|
-
files=files,
|
44
|
-
headers=self._headers()
|
49
|
+
f"{self.url}/api/v1/task/{task_id}", files=files, headers=self._headers()
|
45
50
|
)
|
46
|
-
|
51
|
+
|
47
52
|
r.raise_for_status()
|
48
53
|
return TaskResponse(**r.json()).with_client(self)
|
49
|
-
|
54
|
+
|
50
55
|
def get_task(self, task_id: str) -> TaskResponse:
|
51
56
|
if not self._session:
|
52
57
|
raise ValueError("Session not found")
|
53
58
|
r = self._session.get(
|
54
|
-
f"{self.url}/api/v1/task/{task_id}",
|
55
|
-
headers=self._headers()
|
59
|
+
f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
|
56
60
|
)
|
57
61
|
r.raise_for_status()
|
58
62
|
return TaskResponse(**r.json()).with_client(self)
|
59
63
|
|
60
|
-
|
61
64
|
def delete_task(self, task_id: str) -> None:
|
62
65
|
if not self._session:
|
63
66
|
raise ValueError("Session not found")
|
64
67
|
r = self._session.delete(
|
65
|
-
f"{self.url}/api/v1/task/{task_id}",
|
66
|
-
headers=self._headers()
|
68
|
+
f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
|
67
69
|
)
|
68
70
|
r.raise_for_status()
|
69
71
|
|
@@ -71,7 +73,6 @@ class Chunkr(ChunkrBase):
|
|
71
73
|
if not self._session:
|
72
74
|
raise ValueError("Session not found")
|
73
75
|
r = self._session.get(
|
74
|
-
f"{self.url}/api/v1/task/{task_id}/cancel",
|
75
|
-
headers=self._headers()
|
76
|
+
f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
|
76
77
|
)
|
77
78
|
r.raise_for_status()
|
@@ -7,14 +7,19 @@ from pathlib import Path
|
|
7
7
|
from PIL import Image
|
8
8
|
from typing import Union, BinaryIO
|
9
9
|
|
10
|
+
|
10
11
|
class ChunkrAsync(ChunkrBase):
|
11
12
|
"""Asynchronous Chunkr API client"""
|
12
|
-
|
13
|
+
|
13
14
|
def __init__(self, url: str = None, api_key: str = None):
|
14
15
|
super().__init__(url, api_key)
|
15
16
|
self._client = httpx.AsyncClient()
|
16
17
|
|
17
|
-
async def upload(
|
18
|
+
async def upload(
|
19
|
+
self,
|
20
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
21
|
+
config: Configuration = None,
|
22
|
+
) -> TaskResponseAsync:
|
18
23
|
if not self._client or self._client.is_closed:
|
19
24
|
self._client = httpx.AsyncClient()
|
20
25
|
try:
|
@@ -23,7 +28,7 @@ class ChunkrAsync(ChunkrBase):
|
|
23
28
|
except Exception as e:
|
24
29
|
await self._client.aclose()
|
25
30
|
raise e
|
26
|
-
|
31
|
+
|
27
32
|
async def update(self, task_id: str, config: Configuration) -> TaskResponseAsync:
|
28
33
|
if not self._client or self._client.is_closed:
|
29
34
|
self._client = httpx.AsyncClient()
|
@@ -34,15 +39,17 @@ class ChunkrAsync(ChunkrBase):
|
|
34
39
|
await self._client.aclose()
|
35
40
|
raise e
|
36
41
|
|
37
|
-
async def create_task(
|
42
|
+
async def create_task(
|
43
|
+
self,
|
44
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
45
|
+
config: Configuration = None,
|
46
|
+
) -> TaskResponseAsync:
|
38
47
|
if not self._client or self._client.is_closed:
|
39
48
|
self._client = httpx.AsyncClient()
|
40
49
|
try:
|
41
50
|
files = prepare_upload_data(file, config)
|
42
51
|
r = await self._client.post(
|
43
|
-
f"{self.url}/api/v1/task",
|
44
|
-
files=files,
|
45
|
-
headers=self._headers()
|
52
|
+
f"{self.url}/api/v1/task", files=files, headers=self._headers()
|
46
53
|
)
|
47
54
|
r.raise_for_status()
|
48
55
|
return TaskResponseAsync(**r.json()).with_client(self)
|
@@ -50,7 +57,9 @@ class ChunkrAsync(ChunkrBase):
|
|
50
57
|
await self._client.aclose()
|
51
58
|
raise e
|
52
59
|
|
53
|
-
async def update_task(
|
60
|
+
async def update_task(
|
61
|
+
self, task_id: str, config: Configuration
|
62
|
+
) -> TaskResponseAsync:
|
54
63
|
if not self._client or self._client.is_closed:
|
55
64
|
self._client = httpx.AsyncClient()
|
56
65
|
try:
|
@@ -58,49 +67,46 @@ class ChunkrAsync(ChunkrBase):
|
|
58
67
|
r = await self._client.patch(
|
59
68
|
f"{self.url}/api/v1/task/{task_id}",
|
60
69
|
files=files,
|
61
|
-
headers=self._headers()
|
70
|
+
headers=self._headers(),
|
62
71
|
)
|
63
|
-
|
72
|
+
|
64
73
|
r.raise_for_status()
|
65
74
|
return TaskResponseAsync(**r.json()).with_client(self)
|
66
75
|
except Exception as e:
|
67
76
|
await self._client.aclose()
|
68
77
|
raise e
|
69
|
-
|
78
|
+
|
70
79
|
async def get_task(self, task_id: str) -> TaskResponseAsync:
|
71
80
|
if not self._client or self._client.is_closed:
|
72
81
|
self._client = httpx.AsyncClient()
|
73
82
|
try:
|
74
83
|
r = await self._client.get(
|
75
|
-
f"{self.url}/api/v1/task/{task_id}",
|
76
|
-
headers=self._headers()
|
84
|
+
f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
|
77
85
|
)
|
78
86
|
r.raise_for_status()
|
79
87
|
return TaskResponseAsync(**r.json()).with_client(self)
|
80
88
|
except Exception as e:
|
81
89
|
await self._client.aclose()
|
82
90
|
raise e
|
83
|
-
|
91
|
+
|
84
92
|
async def delete_task(self, task_id: str) -> None:
|
85
93
|
if not self._client or self._client.is_closed:
|
86
94
|
self._client = httpx.AsyncClient()
|
87
95
|
try:
|
88
96
|
r = await self._client.delete(
|
89
|
-
f"{self.url}/api/v1/task/{task_id}",
|
90
|
-
headers=self._headers()
|
97
|
+
f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
|
91
98
|
)
|
92
99
|
r.raise_for_status()
|
93
100
|
except Exception as e:
|
94
101
|
await self._client.aclose()
|
95
102
|
raise e
|
96
|
-
|
103
|
+
|
97
104
|
async def cancel_task(self, task_id: str) -> None:
|
98
105
|
if not self._client or self._client.is_closed:
|
99
106
|
self._client = httpx.AsyncClient()
|
100
107
|
try:
|
101
108
|
r = await self._client.get(
|
102
|
-
f"{self.url}/api/v1/task/{task_id}/cancel",
|
103
|
-
headers=self._headers()
|
109
|
+
f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
|
104
110
|
)
|
105
111
|
r.raise_for_status()
|
106
112
|
except Exception as e:
|
@@ -111,4 +117,4 @@ class ChunkrAsync(ChunkrBase):
|
|
111
117
|
return self
|
112
118
|
|
113
119
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
114
|
-
await self._client.aclose()
|
120
|
+
await self._client.aclose()
|
@@ -9,31 +9,31 @@ from pathlib import Path
|
|
9
9
|
from PIL import Image
|
10
10
|
from typing import BinaryIO, Union
|
11
11
|
|
12
|
+
|
12
13
|
class ChunkrBase(HeadersMixin):
|
13
14
|
"""Base class with shared functionality for Chunkr API clients."""
|
14
15
|
|
15
16
|
def __init__(self, url: str = None, api_key: str = None):
|
16
17
|
load_dotenv()
|
17
|
-
self.url = (
|
18
|
-
|
19
|
-
os.getenv('CHUNKR_URL') or
|
20
|
-
'https://api.chunkr.ai'
|
21
|
-
)
|
22
|
-
self._api_key = (
|
23
|
-
api_key or
|
24
|
-
os.getenv('CHUNKR_API_KEY')
|
25
|
-
)
|
18
|
+
self.url = url or os.getenv("CHUNKR_URL") or "https://api.chunkr.ai"
|
19
|
+
self._api_key = api_key or os.getenv("CHUNKR_API_KEY")
|
26
20
|
if not self._api_key:
|
27
|
-
raise ValueError(
|
28
|
-
|
21
|
+
raise ValueError(
|
22
|
+
"API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai"
|
23
|
+
)
|
24
|
+
|
29
25
|
self.url = self.url.rstrip("/")
|
30
26
|
|
31
27
|
@abstractmethod
|
32
|
-
def upload(
|
28
|
+
def upload(
|
29
|
+
self,
|
30
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
31
|
+
config: Configuration = None,
|
32
|
+
) -> Union[TaskResponse, TaskResponseAsync]:
|
33
33
|
"""Upload a file and wait for processing to complete.
|
34
34
|
|
35
35
|
Args:
|
36
|
-
file: The file to upload.
|
36
|
+
file: The file to upload.
|
37
37
|
config: Configuration options for processing. Optional.
|
38
38
|
|
39
39
|
Examples:
|
@@ -44,7 +44,7 @@ class ChunkrBase(HeadersMixin):
|
|
44
44
|
# Upload from opened file
|
45
45
|
with open("document.pdf", "rb") as f:
|
46
46
|
await chunkr.upload(f)
|
47
|
-
|
47
|
+
|
48
48
|
# Upload from URL
|
49
49
|
await chunkr.upload("https://example.com/document.pdf")
|
50
50
|
|
@@ -60,11 +60,13 @@ class ChunkrBase(HeadersMixin):
|
|
60
60
|
TaskResponse: The completed task response
|
61
61
|
"""
|
62
62
|
pass
|
63
|
-
|
63
|
+
|
64
64
|
@abstractmethod
|
65
|
-
def update(
|
65
|
+
def update(
|
66
|
+
self, task_id: str, config: Configuration
|
67
|
+
) -> Union[TaskResponse, TaskResponseAsync]:
|
66
68
|
"""Update a task by its ID and wait for processing to complete.
|
67
|
-
|
69
|
+
|
68
70
|
Args:
|
69
71
|
task_id: The ID of the task to update
|
70
72
|
config: Configuration options for processing. Optional.
|
@@ -75,7 +77,11 @@ class ChunkrBase(HeadersMixin):
|
|
75
77
|
pass
|
76
78
|
|
77
79
|
@abstractmethod
|
78
|
-
def create_task(
|
80
|
+
def create_task(
|
81
|
+
self,
|
82
|
+
file: Union[str, Path, BinaryIO, Image.Image],
|
83
|
+
config: Configuration = None,
|
84
|
+
) -> Union[TaskResponse, TaskResponseAsync]:
|
79
85
|
"""Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
|
80
86
|
|
81
87
|
Args:
|
@@ -90,7 +96,7 @@ class ChunkrBase(HeadersMixin):
|
|
90
96
|
# Upload from opened file
|
91
97
|
with open("document.pdf", "rb") as f:
|
92
98
|
task = await chunkr.create_task(f)
|
93
|
-
|
99
|
+
|
94
100
|
# Upload from URL
|
95
101
|
task = await chunkr.create_task("https://example.com/document.pdf")
|
96
102
|
|
@@ -109,9 +115,11 @@ class ChunkrBase(HeadersMixin):
|
|
109
115
|
pass
|
110
116
|
|
111
117
|
@abstractmethod
|
112
|
-
def update_task(
|
118
|
+
def update_task(
|
119
|
+
self, task_id: str, config: Configuration
|
120
|
+
) -> Union[TaskResponse, TaskResponseAsync]:
|
113
121
|
"""Update a task by its ID and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
|
114
|
-
|
122
|
+
|
115
123
|
Args:
|
116
124
|
task_id: The ID of the task to update
|
117
125
|
config: Configuration options for processing. Optional.
|
@@ -120,11 +128,11 @@ class ChunkrBase(HeadersMixin):
|
|
120
128
|
TaskResponse: The updated task response
|
121
129
|
"""
|
122
130
|
pass
|
123
|
-
|
131
|
+
|
124
132
|
@abstractmethod
|
125
133
|
def get_task(self, task_id: str) -> Union[TaskResponse, TaskResponseAsync]:
|
126
134
|
"""Get a task response by its ID.
|
127
|
-
|
135
|
+
|
128
136
|
Args:
|
129
137
|
task_id: The ID of the task to get
|
130
138
|
|
@@ -136,18 +144,17 @@ class ChunkrBase(HeadersMixin):
|
|
136
144
|
@abstractmethod
|
137
145
|
def delete_task(self, task_id: str) -> None:
|
138
146
|
"""Delete a task by its ID.
|
139
|
-
|
147
|
+
|
140
148
|
Args:
|
141
149
|
task_id: The ID of the task to delete
|
142
150
|
"""
|
143
151
|
pass
|
144
|
-
|
152
|
+
|
145
153
|
@abstractmethod
|
146
154
|
def cancel_task(self, task_id: str) -> None:
|
147
155
|
"""Cancel a task by its ID.
|
148
|
-
|
156
|
+
|
149
157
|
Args:
|
150
158
|
task_id: The ID of the task to cancel
|
151
159
|
"""
|
152
160
|
pass
|
153
|
-
|