chunkr-ai 0.0.24__py3-none-any.whl → 0.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/chunkr.py CHANGED
@@ -2,8 +2,8 @@ from pathlib import Path
2
2
  from PIL import Image
3
3
  from typing import Union, BinaryIO
4
4
 
5
- from .config import Configuration
6
- from .decorators import anywhere, ensure_client
5
+ from .configuration import Configuration
6
+ from .decorators import anywhere, ensure_client, retry_on_429
7
7
  from .misc import prepare_upload_data
8
8
  from .task_response import TaskResponse
9
9
  from .chunkr_base import ChunkrBase
@@ -29,6 +29,7 @@ class Chunkr(ChunkrBase):
29
29
 
30
30
  @anywhere()
31
31
  @ensure_client()
32
+ @retry_on_429()
32
33
  async def create_task(
33
34
  self,
34
35
  file: Union[str, Path, BinaryIO, Image.Image],
@@ -39,10 +40,11 @@ class Chunkr(ChunkrBase):
39
40
  f"{self.url}/api/v1/task", files=files, headers=self._headers()
40
41
  )
41
42
  r.raise_for_status()
42
- return TaskResponse(**r.json()).with_client(self)
43
+ return TaskResponse(**r.json()).with_client(self, True, False)
43
44
 
44
45
  @anywhere()
45
46
  @ensure_client()
47
+ @retry_on_429()
46
48
  async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
47
49
  files = await prepare_upload_data(None, config, self._client)
48
50
  r = await self._client.patch(
@@ -51,16 +53,22 @@ class Chunkr(ChunkrBase):
51
53
  headers=self._headers(),
52
54
  )
53
55
  r.raise_for_status()
54
- return TaskResponse(**r.json()).with_client(self)
56
+ return TaskResponse(**r.json()).with_client(self, True, False)
55
57
 
56
58
  @anywhere()
57
59
  @ensure_client()
58
- async def get_task(self, task_id: str) -> TaskResponse:
60
+ async def get_task(self, task_id: str, include_chunks: bool = True, base64_urls: bool = False) -> TaskResponse:
61
+ params = {
62
+ "base64_urls": str(base64_urls).lower(),
63
+ "include_chunks": str(include_chunks).lower()
64
+ }
59
65
  r = await self._client.get(
60
- f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
66
+ f"{self.url}/api/v1/task/{task_id}",
67
+ params=params,
68
+ headers=self._headers()
61
69
  )
62
70
  r.raise_for_status()
63
- return TaskResponse(**r.json()).with_client(self)
71
+ return TaskResponse(**r.json()).with_client(self, include_chunks, base64_urls)
64
72
 
65
73
  @anywhere()
66
74
  @ensure_client()
@@ -1,4 +1,4 @@
1
- from .config import Configuration
1
+ from .configuration import Configuration
2
2
  from .task_response import TaskResponse
3
3
  from .auth import HeadersMixin
4
4
  from abc import abstractmethod
@@ -139,11 +139,13 @@ class ChunkrBase(HeadersMixin):
139
139
  pass
140
140
 
141
141
  @abstractmethod
142
- def get_task(self, task_id: str) -> TaskResponse:
142
+ def get_task(self, task_id: str, include_chunks: bool = True, base64_urls: bool = False) -> TaskResponse:
143
143
  """Get a task response by its ID.
144
144
 
145
145
  Args:
146
146
  task_id: The ID of the task to get
147
+ include_chunks: Whether to include chunks in the output response. Defaults to True.
148
+ base64_urls: Whether to return base64 encoded URLs. If false, the URLs will be returned as presigned URLs. Defaults to False.
147
149
 
148
150
  Returns:
149
151
  TaskResponse: The task response
@@ -1,6 +1,6 @@
1
- from pydantic import BaseModel, Field, model_validator, ConfigDict
1
+ from pydantic import BaseModel, Field, ConfigDict
2
2
  from enum import Enum
3
- from typing import Optional, List, Dict, Union, Type
3
+ from typing import Any, List, Optional
4
4
 
5
5
  class GenerationStrategy(str, Enum):
6
6
  LLM = "LLM"
@@ -37,16 +37,6 @@ class SegmentProcessing(BaseModel):
37
37
  class ChunkProcessing(BaseModel):
38
38
  target_length: Optional[int] = None
39
39
 
40
- class Property(BaseModel):
41
- name: str
42
- prop_type: str
43
- description: Optional[str] = None
44
- default: Optional[str] = None
45
-
46
- class JsonSchema(BaseModel):
47
- title: str
48
- properties: List[Property]
49
-
50
40
  class OcrStrategy(str, Enum):
51
41
  ALL = "All"
52
42
  AUTO = "Auto"
@@ -98,9 +88,6 @@ class Chunk(BaseModel):
98
88
  chunk_length: int
99
89
  segments: List[Segment]
100
90
 
101
- class ExtractedJson(BaseModel):
102
- data: Dict
103
-
104
91
  class OutputResponse(BaseModel):
105
92
  chunks: List[Chunk]
106
93
  file_name: Optional[str]
@@ -118,7 +105,6 @@ class Configuration(BaseModel):
118
105
  chunk_processing: Optional[ChunkProcessing] = None
119
106
  expires_in: Optional[int] = None
120
107
  high_resolution: Optional[bool] = None
121
- model: Optional[Model] = None
122
108
  ocr_strategy: Optional[OcrStrategy] = None
123
109
  segment_processing: Optional[SegmentProcessing] = None
124
110
  segmentation_strategy: Optional[SegmentationStrategy] = None
@@ -126,16 +112,10 @@ class Configuration(BaseModel):
126
112
 
127
113
  class OutputConfiguration(Configuration):
128
114
  input_file_url: Optional[str] = None
129
- json_schema: Optional[Union[JsonSchema, Type[BaseModel], BaseModel]] = None
130
-
131
- @model_validator(mode="before")
132
- def map_deprecated_fields(cls, values: Dict) -> Dict:
133
- if isinstance(values, dict) and "target_chunk_length" in values:
134
- target_length = values.pop("target_chunk_length")
135
- if target_length is not None:
136
- values["chunk_processing"] = values.get("chunk_processing", {}) or {}
137
- values["chunk_processing"]["target_length"] = target_length
138
- return values
115
+ # Deprecated
116
+ json_schema: Optional[Any] = None
117
+ model: Optional[Model] = None
118
+ target_chunk_length: Optional[int] = None
139
119
 
140
120
  class Status(str, Enum):
141
121
  STARTING = "Starting"
@@ -59,4 +59,34 @@ def require_task() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitabl
59
59
  self._client._client = httpx.AsyncClient()
60
60
  return await async_func(self, *args, **kwargs)
61
61
  return wrapper
62
+ return decorator
63
+
64
+ def retry_on_429(max_retries: int = 10, initial_delay: float = 0.5) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
65
+ """Decorator that retries the request when encountering 429 Too Many Requests errors.
66
+
67
+ Args:
68
+ max_retries: Maximum number of retry attempts (default: 3)
69
+ initial_delay: Initial delay in seconds, will be exponentially increased (default: 1.0)
70
+ """
71
+ def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
72
+ @functools.wraps(async_func)
73
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
74
+ retries = 0
75
+ while True:
76
+ try:
77
+ return await async_func(*args, **kwargs)
78
+ except httpx.HTTPStatusError as e:
79
+ if e.response.status_code != 429 or retries >= max_retries:
80
+ raise
81
+ retries += 1
82
+ delay = initial_delay
83
+ # Use Retry-After header if available
84
+ retry_after = e.response.headers.get('Retry-After')
85
+ if retry_after:
86
+ try:
87
+ delay = float(retry_after)
88
+ except (ValueError, TypeError):
89
+ pass
90
+ await asyncio.sleep(delay)
91
+ return wrapper
62
92
  return decorator
chunkr_ai/api/misc.py CHANGED
@@ -1,4 +1,4 @@
1
- from .config import Configuration
1
+ from .configuration import Configuration
2
2
  import io
3
3
  import json
4
4
  from pathlib import Path
@@ -3,10 +3,10 @@ from typing import TypeVar, Optional, Generic
3
3
  from pydantic import BaseModel, PrivateAttr
4
4
  import asyncio
5
5
 
6
- from .config import Configuration, OutputConfiguration, OutputResponse, Status
6
+ from .configuration import Configuration, OutputConfiguration, OutputResponse, Status
7
7
  from .protocol import ChunkrClientProtocol
8
8
  from .misc import prepare_upload_data
9
- from .decorators import anywhere, require_task
9
+ from .decorators import anywhere, require_task, retry_on_429
10
10
 
11
11
  T = TypeVar("T", bound="TaskResponse")
12
12
 
@@ -21,10 +21,14 @@ class TaskResponse(BaseModel, Generic[T]):
21
21
  status: Status
22
22
  task_id: str
23
23
  task_url: Optional[str] = None
24
+ include_chunks: bool = False
25
+ _base64_urls: bool = False
24
26
  _client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
25
27
 
26
- def with_client(self, client: ChunkrClientProtocol) -> T:
28
+ def with_client(self, client: ChunkrClientProtocol, include_chunks: bool = False, base64_urls: bool = False) -> T:
27
29
  self._client = client
30
+ self.include_chunks = include_chunks
31
+ self._base64_urls = base64_urls
28
32
  return self
29
33
 
30
34
  def _check_status(self) -> Optional[T]:
@@ -45,11 +49,12 @@ class TaskResponse(BaseModel, Generic[T]):
45
49
  )
46
50
  r.raise_for_status()
47
51
  return r.json()
48
- except (ConnectionError, TimeoutError) as _:
49
- print("Connection error while polling the task, retrying...")
52
+ except (ConnectionError, TimeoutError, OSError) as e:
53
+ print(f"Connection error while polling the task: {str(e)}, retrying...")
50
54
  await asyncio.sleep(0.5)
51
- except Exception:
52
- raise
55
+ return await self._poll_request()
56
+ except Exception as e:
57
+ raise e
53
58
 
54
59
  @anywhere()
55
60
  async def poll(self) -> T:
@@ -64,6 +69,7 @@ class TaskResponse(BaseModel, Generic[T]):
64
69
 
65
70
  @anywhere()
66
71
  @require_task()
72
+ @retry_on_429()
67
73
  async def update(self, config: Configuration) -> T:
68
74
  """Update the task configuration."""
69
75
  f = await prepare_upload_data(None, config, self._client._client)
chunkr_ai/models.py CHANGED
@@ -1,4 +1,4 @@
1
- from .api.config import (
1
+ from .api.configuration import (
2
2
  BoundingBox,
3
3
  Chunk,
4
4
  ChunkProcessing,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.24
3
+ Version: 0.0.25
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -0,0 +1,16 @@
1
+ chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
+ chunkr_ai/models.py,sha256=tOI7ylkhyeFfCLMisk96EPsH4UEcjBx1Mcisxc_AYXI,757
3
+ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
5
+ chunkr_ai/api/chunkr.py,sha256=VnbuAPlWLqyf8xCCU_kpdybgjVPTwZLarDQoD3uozY0,3065
6
+ chunkr_ai/api/chunkr_base.py,sha256=giW56fL7xxJphdOTpIH52dXxpNt7OdP8pNiPSqbNjGM,5835
7
+ chunkr_ai/api/configuration.py,sha256=0wnrKlUIO7opvV963Gr_S8tlAjpo_IkNmbTi1_FwEug,3751
8
+ chunkr_ai/api/decorators.py,sha256=HSq3vcxOeUJkaWaf7HOvCyg9dWkVo8cG5BrU-jhbhmc,4053
9
+ chunkr_ai/api/misc.py,sha256=5PBI6pvOXr0x-3WieSKLrC8MA0iGPa-IG-5FEZ3vnr0,5724
10
+ chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
11
+ chunkr_ai/api/task_response.py,sha256=CZIa3w5qPvSZDbDJ-LAtg7OOY91LsruemaXNyO2PymI,4256
12
+ chunkr_ai-0.0.25.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
+ chunkr_ai-0.0.25.dist-info/METADATA,sha256=CG1cO9YX7TpAHwBXgqLDgF9nwmVv30WLsWzfULx06W4,6996
14
+ chunkr_ai-0.0.25.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
15
+ chunkr_ai-0.0.25.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
+ chunkr_ai-0.0.25.dist-info/RECORD,,
chunkr_ai/api/api.py DELETED
File without changes
@@ -1,17 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
- chunkr_ai/models.py,sha256=MK8FPbWDj1ynvSHaYuslKCPybxLuAlrsVIM3Eym3kKI,750
3
- chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
6
- chunkr_ai/api/chunkr.py,sha256=XTXJFs0xjYY3w3N4fSQcxtJFBtNfzFYYkh6nDlFz4cY,2714
7
- chunkr_ai/api/chunkr_base.py,sha256=4SXA-gdZd1w2zZeeMdy4xog0NKOrKjmo6IMvSl9KSBg,5538
8
- chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
9
- chunkr_ai/api/decorators.py,sha256=y_Z9z0O2XXiX9z6jWDwdbCPdQyMLnjE0pGkJjHQEv_Q,2652
10
- chunkr_ai/api/misc.py,sha256=5Q2K713VPwf3S2519KTzjT9PKhTEBgBMk1d8NNnmpZ0,5717
11
- chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
12
- chunkr_ai/api/task_response.py,sha256=hcHsBgX-2C5Px5Bu0IKk33K_AkqHSEM1Wu2zkcPh9to,3935
13
- chunkr_ai-0.0.24.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
14
- chunkr_ai-0.0.24.dist-info/METADATA,sha256=JyDI8EkFaJQQ7vIo2osHxXmeuNqhQ0UWjgUMHSFIYow,6996
15
- chunkr_ai-0.0.24.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- chunkr_ai-0.0.24.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
- chunkr_ai-0.0.24.dist-info/RECORD,,