chunkr-ai 0.0.24__py3-none-any.whl → 0.0.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/chunkr.py +15 -7
- chunkr_ai/api/chunkr_base.py +4 -2
- chunkr_ai/api/{config.py → configuration.py} +6 -26
- chunkr_ai/api/decorators.py +30 -0
- chunkr_ai/api/misc.py +1 -1
- chunkr_ai/api/task_response.py +13 -7
- chunkr_ai/models.py +1 -1
- {chunkr_ai-0.0.24.dist-info → chunkr_ai-0.0.25.dist-info}/METADATA +1 -1
- chunkr_ai-0.0.25.dist-info/RECORD +16 -0
- chunkr_ai/api/api.py +0 -0
- chunkr_ai-0.0.24.dist-info/RECORD +0 -17
- {chunkr_ai-0.0.24.dist-info → chunkr_ai-0.0.25.dist-info}/LICENSE +0 -0
- {chunkr_ai-0.0.24.dist-info → chunkr_ai-0.0.25.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.24.dist-info → chunkr_ai-0.0.25.dist-info}/top_level.txt +0 -0
chunkr_ai/api/chunkr.py
CHANGED
@@ -2,8 +2,8 @@ from pathlib import Path
|
|
2
2
|
from PIL import Image
|
3
3
|
from typing import Union, BinaryIO
|
4
4
|
|
5
|
-
from .
|
6
|
-
from .decorators import anywhere, ensure_client
|
5
|
+
from .configuration import Configuration
|
6
|
+
from .decorators import anywhere, ensure_client, retry_on_429
|
7
7
|
from .misc import prepare_upload_data
|
8
8
|
from .task_response import TaskResponse
|
9
9
|
from .chunkr_base import ChunkrBase
|
@@ -29,6 +29,7 @@ class Chunkr(ChunkrBase):
|
|
29
29
|
|
30
30
|
@anywhere()
|
31
31
|
@ensure_client()
|
32
|
+
@retry_on_429()
|
32
33
|
async def create_task(
|
33
34
|
self,
|
34
35
|
file: Union[str, Path, BinaryIO, Image.Image],
|
@@ -39,10 +40,11 @@ class Chunkr(ChunkrBase):
|
|
39
40
|
f"{self.url}/api/v1/task", files=files, headers=self._headers()
|
40
41
|
)
|
41
42
|
r.raise_for_status()
|
42
|
-
return TaskResponse(**r.json()).with_client(self)
|
43
|
+
return TaskResponse(**r.json()).with_client(self, True, False)
|
43
44
|
|
44
45
|
@anywhere()
|
45
46
|
@ensure_client()
|
47
|
+
@retry_on_429()
|
46
48
|
async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
|
47
49
|
files = await prepare_upload_data(None, config, self._client)
|
48
50
|
r = await self._client.patch(
|
@@ -51,16 +53,22 @@ class Chunkr(ChunkrBase):
|
|
51
53
|
headers=self._headers(),
|
52
54
|
)
|
53
55
|
r.raise_for_status()
|
54
|
-
return TaskResponse(**r.json()).with_client(self)
|
56
|
+
return TaskResponse(**r.json()).with_client(self, True, False)
|
55
57
|
|
56
58
|
@anywhere()
|
57
59
|
@ensure_client()
|
58
|
-
async def get_task(self, task_id: str) -> TaskResponse:
|
60
|
+
async def get_task(self, task_id: str, include_chunks: bool = True, base64_urls: bool = False) -> TaskResponse:
|
61
|
+
params = {
|
62
|
+
"base64_urls": str(base64_urls).lower(),
|
63
|
+
"include_chunks": str(include_chunks).lower()
|
64
|
+
}
|
59
65
|
r = await self._client.get(
|
60
|
-
f"{self.url}/api/v1/task/{task_id}",
|
66
|
+
f"{self.url}/api/v1/task/{task_id}",
|
67
|
+
params=params,
|
68
|
+
headers=self._headers()
|
61
69
|
)
|
62
70
|
r.raise_for_status()
|
63
|
-
return TaskResponse(**r.json()).with_client(self)
|
71
|
+
return TaskResponse(**r.json()).with_client(self, include_chunks, base64_urls)
|
64
72
|
|
65
73
|
@anywhere()
|
66
74
|
@ensure_client()
|
chunkr_ai/api/chunkr_base.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .
|
1
|
+
from .configuration import Configuration
|
2
2
|
from .task_response import TaskResponse
|
3
3
|
from .auth import HeadersMixin
|
4
4
|
from abc import abstractmethod
|
@@ -139,11 +139,13 @@ class ChunkrBase(HeadersMixin):
|
|
139
139
|
pass
|
140
140
|
|
141
141
|
@abstractmethod
|
142
|
-
def get_task(self, task_id: str) -> TaskResponse:
|
142
|
+
def get_task(self, task_id: str, include_chunks: bool = True, base64_urls: bool = False) -> TaskResponse:
|
143
143
|
"""Get a task response by its ID.
|
144
144
|
|
145
145
|
Args:
|
146
146
|
task_id: The ID of the task to get
|
147
|
+
include_chunks: Whether to include chunks in the output response. Defaults to True.
|
148
|
+
base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as presigned URLs. Defaults to False.
|
147
149
|
|
148
150
|
Returns:
|
149
151
|
TaskResponse: The task response
|
@@ -1,6 +1,6 @@
|
|
1
|
-
from pydantic import BaseModel, Field,
|
1
|
+
from pydantic import BaseModel, Field, ConfigDict
|
2
2
|
from enum import Enum
|
3
|
-
from typing import
|
3
|
+
from typing import Any, List, Optional
|
4
4
|
|
5
5
|
class GenerationStrategy(str, Enum):
|
6
6
|
LLM = "LLM"
|
@@ -37,16 +37,6 @@ class SegmentProcessing(BaseModel):
|
|
37
37
|
class ChunkProcessing(BaseModel):
|
38
38
|
target_length: Optional[int] = None
|
39
39
|
|
40
|
-
class Property(BaseModel):
|
41
|
-
name: str
|
42
|
-
prop_type: str
|
43
|
-
description: Optional[str] = None
|
44
|
-
default: Optional[str] = None
|
45
|
-
|
46
|
-
class JsonSchema(BaseModel):
|
47
|
-
title: str
|
48
|
-
properties: List[Property]
|
49
|
-
|
50
40
|
class OcrStrategy(str, Enum):
|
51
41
|
ALL = "All"
|
52
42
|
AUTO = "Auto"
|
@@ -98,9 +88,6 @@ class Chunk(BaseModel):
|
|
98
88
|
chunk_length: int
|
99
89
|
segments: List[Segment]
|
100
90
|
|
101
|
-
class ExtractedJson(BaseModel):
|
102
|
-
data: Dict
|
103
|
-
|
104
91
|
class OutputResponse(BaseModel):
|
105
92
|
chunks: List[Chunk]
|
106
93
|
file_name: Optional[str]
|
@@ -118,7 +105,6 @@ class Configuration(BaseModel):
|
|
118
105
|
chunk_processing: Optional[ChunkProcessing] = None
|
119
106
|
expires_in: Optional[int] = None
|
120
107
|
high_resolution: Optional[bool] = None
|
121
|
-
model: Optional[Model] = None
|
122
108
|
ocr_strategy: Optional[OcrStrategy] = None
|
123
109
|
segment_processing: Optional[SegmentProcessing] = None
|
124
110
|
segmentation_strategy: Optional[SegmentationStrategy] = None
|
@@ -126,16 +112,10 @@ class Configuration(BaseModel):
|
|
126
112
|
|
127
113
|
class OutputConfiguration(Configuration):
|
128
114
|
input_file_url: Optional[str] = None
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
if isinstance(values, dict) and "target_chunk_length" in values:
|
134
|
-
target_length = values.pop("target_chunk_length")
|
135
|
-
if target_length is not None:
|
136
|
-
values["chunk_processing"] = values.get("chunk_processing", {}) or {}
|
137
|
-
values["chunk_processing"]["target_length"] = target_length
|
138
|
-
return values
|
115
|
+
# Deprecated
|
116
|
+
json_schema: Optional[Any] = None
|
117
|
+
model: Optional[Model] = None
|
118
|
+
target_chunk_length: Optional[int] = None
|
139
119
|
|
140
120
|
class Status(str, Enum):
|
141
121
|
STARTING = "Starting"
|
chunkr_ai/api/decorators.py
CHANGED
@@ -59,4 +59,34 @@ def require_task() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitabl
|
|
59
59
|
self._client._client = httpx.AsyncClient()
|
60
60
|
return await async_func(self, *args, **kwargs)
|
61
61
|
return wrapper
|
62
|
+
return decorator
|
63
|
+
|
64
|
+
def retry_on_429(max_retries: int = 10, initial_delay: float = 0.5) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
|
65
|
+
"""Decorator that retries the request when encountering 429 Too Many Requests errors.
|
66
|
+
|
67
|
+
Args:
|
68
|
+
max_retries: Maximum number of retry attempts (default: 3)
|
69
|
+
initial_delay: Initial delay in seconds, will be exponentially increased (default: 1.0)
|
70
|
+
"""
|
71
|
+
def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
|
72
|
+
@functools.wraps(async_func)
|
73
|
+
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
74
|
+
retries = 0
|
75
|
+
while True:
|
76
|
+
try:
|
77
|
+
return await async_func(*args, **kwargs)
|
78
|
+
except httpx.HTTPStatusError as e:
|
79
|
+
if e.response.status_code != 429 or retries >= max_retries:
|
80
|
+
raise
|
81
|
+
retries += 1
|
82
|
+
delay = initial_delay
|
83
|
+
# Use Retry-After header if available
|
84
|
+
retry_after = e.response.headers.get('Retry-After')
|
85
|
+
if retry_after:
|
86
|
+
try:
|
87
|
+
delay = float(retry_after)
|
88
|
+
except (ValueError, TypeError):
|
89
|
+
pass
|
90
|
+
await asyncio.sleep(delay)
|
91
|
+
return wrapper
|
62
92
|
return decorator
|
chunkr_ai/api/misc.py
CHANGED
chunkr_ai/api/task_response.py
CHANGED
@@ -3,10 +3,10 @@ from typing import TypeVar, Optional, Generic
|
|
3
3
|
from pydantic import BaseModel, PrivateAttr
|
4
4
|
import asyncio
|
5
5
|
|
6
|
-
from .
|
6
|
+
from .configuration import Configuration, OutputConfiguration, OutputResponse, Status
|
7
7
|
from .protocol import ChunkrClientProtocol
|
8
8
|
from .misc import prepare_upload_data
|
9
|
-
from .decorators import anywhere, require_task
|
9
|
+
from .decorators import anywhere, require_task, retry_on_429
|
10
10
|
|
11
11
|
T = TypeVar("T", bound="TaskResponse")
|
12
12
|
|
@@ -21,10 +21,14 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
21
21
|
status: Status
|
22
22
|
task_id: str
|
23
23
|
task_url: Optional[str] = None
|
24
|
+
include_chunks: bool = False
|
25
|
+
_base64_urls: bool = False
|
24
26
|
_client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
|
25
27
|
|
26
|
-
def with_client(self, client: ChunkrClientProtocol) -> T:
|
28
|
+
def with_client(self, client: ChunkrClientProtocol, include_chunks: bool = False, base64_urls: bool = False) -> T:
|
27
29
|
self._client = client
|
30
|
+
self.include_chunks = include_chunks
|
31
|
+
self._base64_urls = base64_urls
|
28
32
|
return self
|
29
33
|
|
30
34
|
def _check_status(self) -> Optional[T]:
|
@@ -45,11 +49,12 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
45
49
|
)
|
46
50
|
r.raise_for_status()
|
47
51
|
return r.json()
|
48
|
-
except (ConnectionError, TimeoutError) as
|
49
|
-
print("Connection error while polling the task, retrying...")
|
52
|
+
except (ConnectionError, TimeoutError, OSError) as e:
|
53
|
+
print(f"Connection error while polling the task: {str(e)}, retrying...")
|
50
54
|
await asyncio.sleep(0.5)
|
51
|
-
|
52
|
-
|
55
|
+
return await self._poll_request()
|
56
|
+
except Exception as e:
|
57
|
+
raise e
|
53
58
|
|
54
59
|
@anywhere()
|
55
60
|
async def poll(self) -> T:
|
@@ -64,6 +69,7 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
64
69
|
|
65
70
|
@anywhere()
|
66
71
|
@require_task()
|
72
|
+
@retry_on_429()
|
67
73
|
async def update(self, config: Configuration) -> T:
|
68
74
|
"""Update the task configuration."""
|
69
75
|
f = await prepare_upload_data(None, config, self._client._client)
|
chunkr_ai/models.py
CHANGED
@@ -0,0 +1,16 @@
|
|
1
|
+
chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
|
2
|
+
chunkr_ai/models.py,sha256=tOI7ylkhyeFfCLMisk96EPsH4UEcjBx1Mcisxc_AYXI,757
|
3
|
+
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
|
5
|
+
chunkr_ai/api/chunkr.py,sha256=VnbuAPlWLqyf8xCCU_kpdybgjVPTwZLarDQoD3uozY0,3065
|
6
|
+
chunkr_ai/api/chunkr_base.py,sha256=giW56fL7xxJphdOTpIH52dXxpNt7OdP8pNiPSqbNjGM,5835
|
7
|
+
chunkr_ai/api/configuration.py,sha256=0wnrKlUIO7opvV963Gr_S8tlAjpo_IkNmbTi1_FwEug,3751
|
8
|
+
chunkr_ai/api/decorators.py,sha256=HSq3vcxOeUJkaWaf7HOvCyg9dWkVo8cG5BrU-jhbhmc,4053
|
9
|
+
chunkr_ai/api/misc.py,sha256=5PBI6pvOXr0x-3WieSKLrC8MA0iGPa-IG-5FEZ3vnr0,5724
|
10
|
+
chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
|
11
|
+
chunkr_ai/api/task_response.py,sha256=CZIa3w5qPvSZDbDJ-LAtg7OOY91LsruemaXNyO2PymI,4256
|
12
|
+
chunkr_ai-0.0.25.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
|
13
|
+
chunkr_ai-0.0.25.dist-info/METADATA,sha256=CG1cO9YX7TpAHwBXgqLDgF9nwmVv30WLsWzfULx06W4,6996
|
14
|
+
chunkr_ai-0.0.25.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
15
|
+
chunkr_ai-0.0.25.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
16
|
+
chunkr_ai-0.0.25.dist-info/RECORD,,
|
chunkr_ai/api/api.py
DELETED
File without changes
|
@@ -1,17 +0,0 @@
|
|
1
|
-
chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
|
2
|
-
chunkr_ai/models.py,sha256=MK8FPbWDj1ynvSHaYuslKCPybxLuAlrsVIM3Eym3kKI,750
|
3
|
-
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
|
6
|
-
chunkr_ai/api/chunkr.py,sha256=XTXJFs0xjYY3w3N4fSQcxtJFBtNfzFYYkh6nDlFz4cY,2714
|
7
|
-
chunkr_ai/api/chunkr_base.py,sha256=4SXA-gdZd1w2zZeeMdy4xog0NKOrKjmo6IMvSl9KSBg,5538
|
8
|
-
chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
|
9
|
-
chunkr_ai/api/decorators.py,sha256=y_Z9z0O2XXiX9z6jWDwdbCPdQyMLnjE0pGkJjHQEv_Q,2652
|
10
|
-
chunkr_ai/api/misc.py,sha256=5Q2K713VPwf3S2519KTzjT9PKhTEBgBMk1d8NNnmpZ0,5717
|
11
|
-
chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
|
12
|
-
chunkr_ai/api/task_response.py,sha256=hcHsBgX-2C5Px5Bu0IKk33K_AkqHSEM1Wu2zkcPh9to,3935
|
13
|
-
chunkr_ai-0.0.24.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
|
14
|
-
chunkr_ai-0.0.24.dist-info/METADATA,sha256=JyDI8EkFaJQQ7vIo2osHxXmeuNqhQ0UWjgUMHSFIYow,6996
|
15
|
-
chunkr_ai-0.0.24.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
16
|
-
chunkr_ai-0.0.24.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
17
|
-
chunkr_ai-0.0.24.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|