chunkr-ai 0.0.17__tar.gz → 0.0.18__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. {chunkr_ai-0.0.17/src/chunkr_ai.egg-info → chunkr_ai-0.0.18}/PKG-INFO +1 -2
  2. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/pyproject.toml +1 -2
  3. chunkr_ai-0.0.18/src/chunkr_ai/__init__.py +3 -0
  4. chunkr_ai-0.0.18/src/chunkr_ai/api/chunkr.py +85 -0
  5. chunkr_ai-0.0.17/src/chunkr_ai/api/base.py → chunkr_ai-0.0.18/src/chunkr_ai/api/chunkr_base.py +117 -6
  6. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai/api/config.py +18 -45
  7. chunkr_ai-0.0.18/src/chunkr_ai/api/decorators.py +58 -0
  8. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai/api/misc.py +0 -2
  9. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai/api/protocol.py +0 -2
  10. chunkr_ai-0.0.18/src/chunkr_ai/api/task_response.py +119 -0
  11. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai/models.py +3 -12
  12. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18/src/chunkr_ai.egg-info}/PKG-INFO +1 -2
  13. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai.egg-info/SOURCES.txt +2 -6
  14. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai.egg-info/requires.txt +0 -1
  15. chunkr_ai-0.0.18/tests/test_chunkr.py +211 -0
  16. chunkr_ai-0.0.17/src/chunkr_ai/__init__.py +0 -4
  17. chunkr_ai-0.0.17/src/chunkr_ai/api/chunkr.py +0 -78
  18. chunkr_ai-0.0.17/src/chunkr_ai/api/chunkr_async.py +0 -120
  19. chunkr_ai-0.0.17/src/chunkr_ai/api/chunkr_base.py +0 -160
  20. chunkr_ai-0.0.17/src/chunkr_ai/api/schema.py +0 -136
  21. chunkr_ai-0.0.17/src/chunkr_ai/api/task.py +0 -66
  22. chunkr_ai-0.0.17/src/chunkr_ai/api/task_async.py +0 -69
  23. chunkr_ai-0.0.17/src/chunkr_ai/api/task_base.py +0 -85
  24. chunkr_ai-0.0.17/tests/test_chunkr.py +0 -436
  25. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/LICENSE +0 -0
  26. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/README.md +0 -0
  27. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/setup.cfg +0 -0
  28. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai/api/__init__.py +0 -0
  29. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai/api/api.py +0 -0
  30. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai/api/auth.py +0 -0
  31. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  32. {chunkr_ai-0.0.17 → chunkr_ai-0.0.18}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.17
3
+ Version: 0.0.18
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -11,7 +11,6 @@ Requires-Dist: pillow>=10.0.0
11
11
  Requires-Dist: pydantic>=2.0.0
12
12
  Requires-Dist: pytest-asyncio>=0.21.0
13
13
  Requires-Dist: python-dotenv>=0.19.0
14
- Requires-Dist: requests>=2.28.0
15
14
  Provides-Extra: test
16
15
  Requires-Dist: pytest>=7.0.0; extra == "test"
17
16
  Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.17"
7
+ version = "0.0.18"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -16,7 +16,6 @@ dependencies = [
16
16
  "pydantic>=2.0.0",
17
17
  "pytest-asyncio>=0.21.0",
18
18
  "python-dotenv>=0.19.0",
19
- "requests>=2.28.0",
20
19
  ]
21
20
 
22
21
  [project.optional-dependencies]
@@ -0,0 +1,3 @@
1
+ from .api.chunkr import Chunkr
2
+
3
+ __all__ = ["Chunkr"]
@@ -0,0 +1,85 @@
1
+ from pathlib import Path
2
+ from PIL import Image
3
+ from typing import Union, BinaryIO
4
+
5
+ from .config import Configuration
6
+ from .decorators import anywhere, ensure_client
7
+ from .misc import prepare_upload_data
8
+ from .task_response import TaskResponse
9
+ from .chunkr_base import ChunkrBase
10
+
11
+ class Chunkr(ChunkrBase):
12
+ """Chunkr API client that works in both sync and async contexts"""
13
+
14
+ @anywhere()
15
+ @ensure_client()
16
+ async def upload(
17
+ self,
18
+ file: Union[str, Path, BinaryIO, Image.Image],
19
+ config: Configuration = None,
20
+ ) -> TaskResponse:
21
+ task = await self.create_task(file, config)
22
+ return await task.poll()
23
+
24
+ @anywhere()
25
+ @ensure_client()
26
+ async def update(self, task_id: str, config: Configuration) -> TaskResponse:
27
+ task = await self.update_task(task_id, config)
28
+ return await task.poll()
29
+
30
+ @anywhere()
31
+ @ensure_client()
32
+ async def create_task(
33
+ self,
34
+ file: Union[str, Path, BinaryIO, Image.Image],
35
+ config: Configuration = None,
36
+ ) -> TaskResponse:
37
+ files = prepare_upload_data(file, config)
38
+ r = await self._client.post(
39
+ f"{self.url}/api/v1/task", files=files, headers=self._headers()
40
+ )
41
+ r.raise_for_status()
42
+ return TaskResponse(**r.json()).with_client(self)
43
+
44
+ @anywhere()
45
+ @ensure_client()
46
+ async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
47
+ files = prepare_upload_data(None, config)
48
+ r = await self._client.patch(
49
+ f"{self.url}/api/v1/task/{task_id}",
50
+ files=files,
51
+ headers=self._headers(),
52
+ )
53
+ r.raise_for_status()
54
+ return TaskResponse(**r.json()).with_client(self)
55
+
56
+ @anywhere()
57
+ @ensure_client()
58
+ async def get_task(self, task_id: str) -> TaskResponse:
59
+ r = await self._client.get(
60
+ f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
61
+ )
62
+ r.raise_for_status()
63
+ return TaskResponse(**r.json()).with_client(self)
64
+
65
+ @anywhere()
66
+ @ensure_client()
67
+ async def delete_task(self, task_id: str) -> None:
68
+ r = await self._client.delete(
69
+ f"{self.url}/api/v1/task/{task_id}", headers=self._headers()
70
+ )
71
+ r.raise_for_status()
72
+
73
+ @ensure_client()
74
+ @anywhere()
75
+ async def cancel_task(self, task_id: str) -> None:
76
+ r = await self._client.get(
77
+ f"{self.url}/api/v1/task/{task_id}/cancel", headers=self._headers()
78
+ )
79
+ r.raise_for_status()
80
+
81
+ @anywhere()
82
+ async def close(self):
83
+ if self._client:
84
+ await self._client.aclose()
85
+ self._client = None
@@ -1,8 +1,9 @@
1
1
  from .config import Configuration
2
- from .task import TaskResponse
2
+ from .task_response import TaskResponse
3
3
  from .auth import HeadersMixin
4
4
  from abc import abstractmethod
5
5
  from dotenv import load_dotenv
6
+ import httpx
6
7
  import io
7
8
  import json
8
9
  import os
@@ -25,6 +26,7 @@ class ChunkrBase(HeadersMixin):
25
26
  )
26
27
 
27
28
  self.url = self.url.rstrip("/")
29
+ self._client = httpx.AsyncClient()
28
30
 
29
31
  def _prepare_file(
30
32
  self, file: Union[str, Path, BinaryIO, Image.Image]
@@ -158,19 +160,100 @@ class ChunkrBase(HeadersMixin):
158
160
  ) -> TaskResponse:
159
161
  """Upload a file and wait for processing to complete.
160
162
 
161
- Must be implemented by subclasses.
163
+ Args:
164
+ file: The file to upload.
165
+ config: Configuration options for processing. Optional.
166
+
167
+ Examples:
168
+ ```python
169
+ # Upload from file path
170
+ await chunkr.upload("document.pdf")
171
+
172
+ # Upload from opened file
173
+ with open("document.pdf", "rb") as f:
174
+ await chunkr.upload(f)
175
+
176
+ # Upload from URL
177
+ await chunkr.upload("https://example.com/document.pdf")
178
+
179
+ # Upload from base64 string (must include MIME type header)
180
+ await chunkr.upload("data:application/pdf;base64,JVBERi0...")
181
+
182
+ # Upload an image
183
+ from PIL import Image
184
+ img = Image.open("photo.jpg")
185
+ await chunkr.upload(img)
186
+ ```
187
+ Returns:
188
+ TaskResponse: The completed task response
162
189
  """
163
190
  pass
164
191
 
165
192
  @abstractmethod
166
- def start_upload(
193
+ def update(
194
+ self, task_id: str, config: Configuration
195
+ ) -> TaskResponse:
196
+ """Update a task by its ID and wait for processing to complete.
197
+
198
+ Args:
199
+ task_id: The ID of the task to update
200
+ config: Configuration options for processing. Optional.
201
+
202
+ Returns:
203
+ TaskResponse: The updated task response
204
+ """
205
+ pass
206
+
207
+ @abstractmethod
208
+ def create_task(
167
209
  self,
168
210
  file: Union[str, Path, BinaryIO, Image.Image],
169
211
  config: Configuration = None,
170
212
  ) -> TaskResponse:
171
- """Upload a file for processing and immediately return the task response.
213
+ """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
214
+
215
+ Args:
216
+ file: The file to upload.
217
+ config: Configuration options for processing. Optional.
218
+
219
+ Examples:
220
+ ```
221
+ # Upload from file path
222
+ task = await chunkr.create_task("document.pdf")
172
223
 
173
- Must be implemented by subclasses.
224
+ # Upload from opened file
225
+ with open("document.pdf", "rb") as f:
226
+ task = await chunkr.create_task(f)
227
+
228
+ # Upload from URL
229
+ task = await chunkr.create_task("https://example.com/document.pdf")
230
+
231
+ # Upload from base64 string (must include MIME type header)
232
+ task = await chunkr.create_task("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
233
+
234
+ # Upload an image
235
+ from PIL import Image
236
+ img = Image.open("photo.jpg")
237
+ task = await chunkr.create_task(img)
238
+
239
+ # Wait for the task to complete - this can be done when needed
240
+ await task.poll()
241
+ ```
242
+ """
243
+ pass
244
+
245
+ @abstractmethod
246
+ def update_task(
247
+ self, task_id: str, config: Configuration
248
+ ) -> TaskResponse:
249
+ """Update a task by its ID and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`.
250
+
251
+ Args:
252
+ task_id: The ID of the task to update
253
+ config: Configuration options for processing. Optional.
254
+
255
+ Returns:
256
+ TaskResponse: The updated task response
174
257
  """
175
258
  pass
176
259
 
@@ -178,6 +261,34 @@ class ChunkrBase(HeadersMixin):
178
261
  def get_task(self, task_id: str) -> TaskResponse:
179
262
  """Get a task response by its ID.
180
263
 
181
- Must be implemented by subclasses.
264
+ Args:
265
+ task_id: The ID of the task to get
266
+
267
+ Returns:
268
+ TaskResponse: The task response
269
+ """
270
+ pass
271
+
272
+ @abstractmethod
273
+ def delete_task(self, task_id: str) -> None:
274
+ """Delete a task by its ID.
275
+
276
+ Args:
277
+ task_id: The ID of the task to delete
278
+ """
279
+ pass
280
+
281
+ @abstractmethod
282
+ def cancel_task(self, task_id: str) -> None:
283
+ """Cancel a task by its ID.
284
+
285
+ Args:
286
+ task_id: The ID of the task to cancel
182
287
  """
183
288
  pass
289
+
290
+ @abstractmethod
291
+ def close(self) -> None:
292
+ """Close the client connection.
293
+ This should be called when you're done using the client to properly clean up resources."""
294
+ pass
@@ -1,26 +1,21 @@
1
1
  from pydantic import BaseModel, Field, model_validator, ConfigDict
2
2
  from enum import Enum
3
3
  from typing import Optional, List, Dict, Union, Type
4
- from .schema import from_pydantic
5
-
6
4
 
7
5
  class GenerationStrategy(str, Enum):
8
6
  LLM = "LLM"
9
7
  AUTO = "Auto"
10
8
 
11
-
12
9
  class CroppingStrategy(str, Enum):
13
10
  ALL = "All"
14
11
  AUTO = "Auto"
15
12
 
16
-
17
13
  class GenerationConfig(BaseModel):
18
14
  html: Optional[GenerationStrategy] = None
19
15
  llm: Optional[str] = None
20
16
  markdown: Optional[GenerationStrategy] = None
21
17
  crop_image: Optional[CroppingStrategy] = None
22
18
 
23
-
24
19
  class SegmentProcessing(BaseModel):
25
20
  model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
26
21
 
@@ -39,46 +34,38 @@ class SegmentProcessing(BaseModel):
39
34
  page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
40
35
  page: Optional[GenerationConfig] = Field(default=None, alias="Page")
41
36
 
42
-
43
37
  class ChunkProcessing(BaseModel):
44
38
  target_length: Optional[int] = None
45
39
 
46
-
47
40
  class Property(BaseModel):
48
41
  name: str
49
42
  prop_type: str
50
43
  description: Optional[str] = None
51
44
  default: Optional[str] = None
52
45
 
53
-
54
46
  class JsonSchema(BaseModel):
55
47
  title: str
56
48
  properties: List[Property]
57
49
 
58
-
59
50
  class OcrStrategy(str, Enum):
60
51
  ALL = "All"
61
52
  AUTO = "Auto"
62
53
 
63
-
64
54
  class SegmentationStrategy(str, Enum):
65
55
  LAYOUT_ANALYSIS = "LayoutAnalysis"
66
56
  PAGE = "Page"
67
57
 
68
-
69
58
  class BoundingBox(BaseModel):
70
59
  left: float
71
60
  top: float
72
61
  width: float
73
62
  height: float
74
63
 
75
-
76
64
  class OCRResult(BaseModel):
77
65
  bbox: BoundingBox
78
66
  text: str
79
67
  confidence: Optional[float]
80
68
 
81
-
82
69
  class SegmentType(str, Enum):
83
70
  CAPTION = "Caption"
84
71
  FOOTNOTE = "Footnote"
@@ -93,7 +80,6 @@ class SegmentType(str, Enum):
93
80
  TEXT = "Text"
94
81
  TITLE = "Title"
95
82
 
96
-
97
83
  class Segment(BaseModel):
98
84
  bbox: BoundingBox
99
85
  content: str
@@ -107,42 +93,41 @@ class Segment(BaseModel):
107
93
  segment_id: str
108
94
  segment_type: SegmentType
109
95
 
110
-
111
96
  class Chunk(BaseModel):
112
97
  chunk_id: str
113
98
  chunk_length: int
114
99
  segments: List[Segment]
115
100
 
116
-
117
101
  class ExtractedJson(BaseModel):
118
102
  data: Dict
119
103
 
120
-
121
104
  class OutputResponse(BaseModel):
122
105
  chunks: List[Chunk]
123
- extracted_json: Optional[ExtractedJson] = Field(default=None)
124
-
106
+ file_name: Optional[str]
107
+ page_count: Optional[int]
108
+ pdf_url: Optional[str]
125
109
 
126
110
  class Model(str, Enum):
127
111
  FAST = "Fast"
128
112
  HIGH_QUALITY = "HighQuality"
129
113
 
130
- class PipelineType(str, Enum):
114
+ class Pipeline(str, Enum):
131
115
  AZURE = "Azure"
132
116
 
133
117
  class Configuration(BaseModel):
134
- chunk_processing: Optional[ChunkProcessing] = Field(default=None)
135
- expires_in: Optional[int] = Field(default=None)
136
- high_resolution: Optional[bool] = Field(default=None)
137
- json_schema: Optional[Union[JsonSchema, Type[BaseModel], BaseModel]] = Field(
138
- default=None
139
- )
140
- model: Optional[Model] = Field(default=None)
141
- ocr_strategy: Optional[OcrStrategy] = Field(default=None)
142
- segment_processing: Optional[SegmentProcessing] = Field(default=None)
143
- segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
144
- pipeline: Optional[PipelineType] = Field(default=None)
145
-
118
+ chunk_processing: Optional[ChunkProcessing] = None
119
+ expires_in: Optional[int] = None
120
+ high_resolution: Optional[bool] = None
121
+ model: Optional[Model] = None
122
+ ocr_strategy: Optional[OcrStrategy] = None
123
+ segment_processing: Optional[SegmentProcessing] = None
124
+ segmentation_strategy: Optional[SegmentationStrategy] = None
125
+ pipeline: Optional[Pipeline] = None
126
+
127
+ class OutputConfiguration(Configuration):
128
+ input_file_url: Optional[str] = None
129
+ json_schema: Optional[Union[JsonSchema, Type[BaseModel], BaseModel]] = None
130
+
146
131
  @model_validator(mode="before")
147
132
  def map_deprecated_fields(cls, values: Dict) -> Dict:
148
133
  if isinstance(values, dict) and "target_chunk_length" in values:
@@ -151,19 +136,7 @@ class Configuration(BaseModel):
151
136
  values["chunk_processing"] = values.get("chunk_processing", {}) or {}
152
137
  values["chunk_processing"]["target_length"] = target_length
153
138
  return values
154
-
155
- @model_validator(mode="after")
156
- def convert_json_schema(self) -> "Configuration":
157
- if self.json_schema is not None and not isinstance(
158
- self.json_schema, JsonSchema
159
- ):
160
- if isinstance(self.json_schema, (BaseModel, type)) and issubclass(
161
- getattr(self.json_schema, "__class__", type), BaseModel
162
- ):
163
- self.json_schema = JsonSchema(**from_pydantic(self.json_schema))
164
- return self
165
-
166
-
139
+
167
140
  class Status(str, Enum):
168
141
  STARTING = "Starting"
169
142
  PROCESSING = "Processing"
@@ -0,0 +1,58 @@
1
+ import functools
2
+ import asyncio
3
+ import httpx
4
+ from typing import Callable, Any, TypeVar, Awaitable, ParamSpec, Union, overload
5
+
6
+ T = TypeVar('T')
7
+ P = ParamSpec('P')
8
+
9
+ _sync_loop = None
10
+
11
+ @overload
12
+ def anywhere() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Union[Awaitable[T], T]]]: ...
13
+
14
+ def anywhere():
15
+ """Decorator that allows an async function to run anywhere - sync or async context."""
16
+ def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Union[Awaitable[T], T]]:
17
+ @functools.wraps(async_func)
18
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> Union[Awaitable[T], T]:
19
+ global _sync_loop
20
+ try:
21
+ loop = asyncio.get_running_loop()
22
+ return async_func(*args, **kwargs)
23
+ except RuntimeError:
24
+ if _sync_loop is None:
25
+ _sync_loop = asyncio.new_event_loop()
26
+ asyncio.set_event_loop(_sync_loop)
27
+ try:
28
+ return _sync_loop.run_until_complete(async_func(*args, **kwargs))
29
+ finally:
30
+ asyncio.set_event_loop(None)
31
+ return wrapper
32
+ return decorator
33
+
34
+ def ensure_client() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
35
+ """Decorator that ensures a valid httpx.AsyncClient exists before executing the method"""
36
+ def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
37
+ @functools.wraps(async_func)
38
+ async def wrapper(self: Any, *args: P.args, **kwargs: P.kwargs) -> T:
39
+ if not self._client or self._client.is_closed:
40
+ self._client = httpx.AsyncClient()
41
+ return await async_func(self, *args, **kwargs)
42
+ return wrapper
43
+ return decorator
44
+
45
+ def require_task() -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
46
+ """Decorator that ensures task has required attributes and valid client before execution"""
47
+ def decorator(async_func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
48
+ @functools.wraps(async_func)
49
+ async def wrapper(self: Any, *args: P.args, **kwargs: P.kwargs) -> T:
50
+ if not self.task_url:
51
+ raise ValueError("Task URL not found")
52
+ if not self._client:
53
+ raise ValueError("Client not found")
54
+ if not self._client._client or self._client._client.is_closed:
55
+ self._client._client = httpx.AsyncClient()
56
+ return await async_func(self, *args, **kwargs)
57
+ return wrapper
58
+ return decorator
@@ -3,11 +3,9 @@ import io
3
3
  import json
4
4
  from pathlib import Path
5
5
  from PIL import Image
6
- from pydantic import BaseModel
7
6
  import requests
8
7
  from typing import Union, Tuple, BinaryIO, Optional
9
8
 
10
-
11
9
  def prepare_file(file: Union[str, Path, BinaryIO, Image.Image]) -> Tuple[str, BinaryIO]:
12
10
  """Convert various file types into a tuple of (filename, file-like object)."""
13
11
  # Handle URLs
@@ -1,5 +1,4 @@
1
1
  from typing import Optional, runtime_checkable, Protocol
2
- from requests import Session
3
2
  from httpx import AsyncClient
4
3
 
5
4
 
@@ -9,7 +8,6 @@ class ChunkrClientProtocol(Protocol):
9
8
 
10
9
  url: str
11
10
  _api_key: str
12
- _session: Optional[Session] = None
13
11
  _client: Optional[AsyncClient] = None
14
12
 
15
13
  def get_api_key(self) -> str:
@@ -0,0 +1,119 @@
1
+ from datetime import datetime
2
+ from typing import TypeVar, Optional, Generic
3
+ from pydantic import BaseModel, PrivateAttr
4
+ import asyncio
5
+
6
+ from .config import Configuration, OutputConfiguration, OutputResponse, Status
7
+ from .protocol import ChunkrClientProtocol
8
+ from .misc import prepare_upload_data
9
+ from .decorators import anywhere, require_task
10
+
11
+ T = TypeVar("T", bound="TaskResponse")
12
+
13
+ class TaskResponse(BaseModel, Generic[T]):
14
+ configuration: OutputConfiguration
15
+ created_at: datetime
16
+ expires_at: Optional[datetime]
17
+ finished_at: Optional[datetime]
18
+ message: str
19
+ output: Optional[OutputResponse]
20
+ started_at: Optional[datetime]
21
+ status: Status
22
+ task_id: str
23
+ task_url: Optional[str]
24
+ _client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
25
+
26
+ def with_client(self, client: ChunkrClientProtocol) -> T:
27
+ self._client = client
28
+ return self
29
+
30
+ def _check_status(self) -> Optional[T]:
31
+ """Helper method to check task status and handle completion/failure"""
32
+ if self.status == "Failed":
33
+ raise ValueError(self.message)
34
+ if self.status not in ("Starting", "Processing"):
35
+ return self
36
+ return None
37
+
38
+ async def _poll_request(self) -> dict:
39
+ try:
40
+ if not self._client._client:
41
+ raise ValueError("Client not found")
42
+ r = await self._client._client.get(
43
+ self.task_url, headers=self._client._headers()
44
+ )
45
+ r.raise_for_status()
46
+ return r.json()
47
+ except (ConnectionError, TimeoutError) as _:
48
+ print("Connection error while polling the task, retrying...")
49
+ await asyncio.sleep(0.5)
50
+ except Exception:
51
+ raise
52
+
53
+ @anywhere()
54
+ @require_task()
55
+ async def poll(self) -> T:
56
+ """Poll the task for completion."""
57
+ while True:
58
+ j = await self._poll_request()
59
+ updated = TaskResponse(**j).with_client(self._client)
60
+ self.__dict__.update(updated.__dict__)
61
+ if res := self._check_status():
62
+ return res
63
+ await asyncio.sleep(0.5)
64
+
65
+ @anywhere()
66
+ @require_task()
67
+ async def update(self, config: Configuration) -> T:
68
+ """Update the task configuration."""
69
+ f = prepare_upload_data(None, config)
70
+ r = await self._client._client.patch(
71
+ self.task_url, files=f, headers=self._client._headers()
72
+ )
73
+ r.raise_for_status()
74
+ updated = TaskResponse(**r.json()).with_client(self._client)
75
+ self.__dict__.update(updated.__dict__)
76
+ return await self.poll()
77
+
78
+ @anywhere()
79
+ @require_task()
80
+ async def delete(self) -> T:
81
+ """Delete the task."""
82
+ r = await self._client._client.delete(
83
+ self.task_url, headers=self._client._headers()
84
+ )
85
+ r.raise_for_status()
86
+ return self
87
+
88
+ @anywhere()
89
+ @require_task()
90
+ async def cancel(self) -> T:
91
+ """Cancel the task."""
92
+ r = await self._client._client.get(
93
+ f"{self.task_url}/cancel", headers=self._client._headers()
94
+ )
95
+ r.raise_for_status()
96
+ return await self.poll()
97
+
98
+ def html(self) -> str:
99
+ """Get the full HTML of the task"""
100
+ return self._get_content("html")
101
+
102
+ def markdown(self) -> str:
103
+ """Get the full markdown of the task"""
104
+ return self._get_content("markdown")
105
+
106
+ def content(self) -> str:
107
+ """Get the full content of the task"""
108
+ return self._get_content("content")
109
+
110
+ def _get_content(self, t: str) -> str:
111
+ if not self.output:
112
+ return ""
113
+ parts = []
114
+ for c in self.output.chunks:
115
+ for s in c.segments:
116
+ v = getattr(s, t)
117
+ if v:
118
+ parts.append(v)
119
+ return "\n".join(parts)