chunkr-ai 0.0.5__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {chunkr_ai-0.0.5/src/chunkr_ai.egg-info → chunkr_ai-0.0.7}/PKG-INFO +2 -1
  2. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/pyproject.toml +3 -1
  3. chunkr_ai-0.0.7/src/chunkr_ai/api/base.py +85 -0
  4. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/chunkr.py +63 -4
  5. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/chunkr_async.py +43 -4
  6. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/config.py +19 -19
  7. chunkr_ai-0.0.7/src/chunkr_ai/api/misc.py +106 -0
  8. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/task.py +61 -16
  9. chunkr_ai-0.0.7/src/chunkr_ai/main.py +12 -0
  10. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/models.py +1 -3
  11. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7/src/chunkr_ai.egg-info}/PKG-INFO +2 -1
  12. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai.egg-info/SOURCES.txt +1 -0
  13. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai.egg-info/requires.txt +1 -0
  14. chunkr_ai-0.0.7/tests/test_chunkr.py +329 -0
  15. chunkr_ai-0.0.5/src/chunkr_ai/api/base.py +0 -173
  16. chunkr_ai-0.0.5/src/chunkr_ai/main.py +0 -0
  17. chunkr_ai-0.0.5/tests/test_chunkr.py +0 -158
  18. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/LICENSE +0 -0
  19. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/README.md +0 -0
  20. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/setup.cfg +0 -0
  21. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/__init__.py +0 -0
  22. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/__init__.py +0 -0
  23. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/api.py +0 -0
  24. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/auth.py +0 -0
  25. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/protocol.py +0 -0
  26. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  27. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -9,6 +9,7 @@ License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.28.1
10
10
  Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: pydantic>=2.10.4
12
+ Requires-Dist: pytest-asyncio>=0.25.2
12
13
  Requires-Dist: python-dotenv>=1.0.1
13
14
  Requires-Dist: requests>=2.32.3
14
15
  Provides-Extra: test
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.5"
7
+ version = "0.0.7"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -14,6 +14,7 @@ dependencies = [
14
14
  "httpx>=0.28.1",
15
15
  "pillow>=11.1.0",
16
16
  "pydantic>=2.10.4",
17
+ "pytest-asyncio>=0.25.2",
17
18
  "python-dotenv>=1.0.1",
18
19
  "requests>=2.32.3",
19
20
  ]
@@ -23,3 +24,4 @@ test = [
23
24
  "pytest>=8.3.4",
24
25
  "pytest-xdist>=3.6.1",
25
26
  ]
27
+
@@ -0,0 +1,85 @@
1
+ from .config import Configuration
2
+ from .task import TaskResponse
3
+ from .auth import HeadersMixin
4
+ from abc import abstractmethod
5
+ from dotenv import load_dotenv
6
+ import os
7
+ from pathlib import Path
8
+ from PIL import Image
9
+ from typing import BinaryIO, Union
10
+
11
+ class ChunkrBase(HeadersMixin):
12
+ """Base class with shared functionality for Chunkr API clients."""
13
+
14
+ def __init__(self, url: str = None, api_key: str = None):
15
+ load_dotenv()
16
+ self.url = (
17
+ url or
18
+ os.getenv('CHUNKR_URL') or
19
+ 'https://api.chunkr.ai'
20
+ )
21
+ self._api_key = (
22
+ api_key or
23
+ os.getenv('CHUNKR_API_KEY')
24
+ )
25
+ if not self._api_key:
26
+ raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
27
+
28
+ self.url = self.url.rstrip("/")
29
+
30
+ @abstractmethod
31
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
32
+ """Upload a file and wait for processing to complete.
33
+
34
+ Must be implemented by subclasses.
35
+ """
36
+ pass
37
+
38
+ @abstractmethod
39
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
40
+ """Update a task by its ID.
41
+
42
+ Must be implemented by subclasses.
43
+ """
44
+ pass
45
+
46
+ @abstractmethod
47
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
48
+ """Upload a file for processing and immediately return the task response.
49
+
50
+ Must be implemented by subclasses.
51
+ """
52
+ pass
53
+
54
+ @abstractmethod
55
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
56
+ """Update a task by its ID.
57
+
58
+ Must be implemented by subclasses.
59
+ """
60
+ pass
61
+
62
+ @abstractmethod
63
+ def get_task(self, task_id: str) -> TaskResponse:
64
+ """Get a task response by its ID.
65
+
66
+ Must be implemented by subclasses.
67
+ """
68
+ pass
69
+
70
+ @abstractmethod
71
+ def delete_task(self, task_id: str) -> None:
72
+ """Delete a task by its ID.
73
+
74
+ Must be implemented by subclasses.
75
+ """
76
+ pass
77
+
78
+ @abstractmethod
79
+ def cancel_task(self, task_id: str) -> None:
80
+ """Cancel a task by its ID.
81
+
82
+ Must be implemented by subclasses.
83
+ """
84
+ pass
85
+
@@ -5,6 +5,7 @@ from pathlib import Path
5
5
  from PIL import Image
6
6
  import requests
7
7
  from typing import Union, BinaryIO
8
+ from .misc import prepare_upload_data
8
9
 
9
10
  class Chunkr(ChunkrBase):
10
11
  """Chunkr API client"""
@@ -43,10 +44,23 @@ class Chunkr(ChunkrBase):
43
44
  Returns:
44
45
  TaskResponse: The completed task response
45
46
  """
46
- task = self.start_upload(file, config)
47
+ task = self.create_task(file, config)
47
48
  return task.poll()
49
+
50
+ def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
+ """Update a task by its ID and wait for processing to complete.
52
+
53
+ Args:
54
+ task_id: The ID of the task to update
55
+ config: Configuration options for processing. Optional.
48
56
 
49
- def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
57
+ Returns:
58
+ TaskResponse: The updated task response
59
+ """
60
+ task = self.update_task(task_id, config)
61
+ return task.poll()
62
+
63
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
50
64
  """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`
51
65
 
52
66
  Args:
@@ -80,16 +94,35 @@ class Chunkr(ChunkrBase):
80
94
  Returns:
81
95
  TaskResponse: The initial task response
82
96
  """
83
- files, data = self._prepare_upload_data(file, config)
97
+ files= prepare_upload_data(file, config)
84
98
  r = self._session.post(
85
99
  f"{self.url}/api/v1/task",
86
100
  files=files,
87
- data=data,
88
101
  headers=self._headers()
89
102
  )
90
103
  r.raise_for_status()
91
104
  return TaskResponse(**r.json()).with_client(self)
105
+
106
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
+ """Update a task by its ID.
108
+
109
+ Args:
110
+ task_id: The ID of the task to update
111
+ config: The new configuration to use
92
112
 
113
+ Returns:
114
+ TaskResponse: The updated task response
115
+ """
116
+ files = prepare_upload_data(None, config)
117
+ r = self._session.patch(
118
+ f"{self.url}/api/v1/task/{task_id}",
119
+ files=files,
120
+ headers=self._headers()
121
+ )
122
+
123
+ r.raise_for_status()
124
+ return TaskResponse(**r.json()).with_client(self)
125
+
93
126
  def get_task(self, task_id: str) -> TaskResponse:
94
127
  """Get a task response by its ID.
95
128
 
@@ -106,3 +139,29 @@ class Chunkr(ChunkrBase):
106
139
  r.raise_for_status()
107
140
  return TaskResponse(**r.json()).with_client(self)
108
141
 
142
+
143
+ def delete_task(self, task_id: str) -> None:
144
+ """Delete a task by its ID.
145
+
146
+ Args:
147
+ task_id: The ID of the task to delete
148
+ """
149
+ r = self._session.delete(
150
+ f"{self.url}/api/v1/task/{task_id}",
151
+ headers=self._headers()
152
+ )
153
+ r.raise_for_status()
154
+
155
+ def cancel_task(self, task_id: str) -> None:
156
+ """Cancel a task by its ID.
157
+
158
+ Args:
159
+ task_id: The ID of the task to cancel
160
+ """
161
+ r = self._session.get(
162
+ f"{self.url}/api/v1/task/{task_id}/cancel",
163
+ headers=self._headers()
164
+ )
165
+ r.raise_for_status()
166
+
167
+
@@ -5,6 +5,7 @@ import httpx
5
5
  from pathlib import Path
6
6
  from PIL import Image
7
7
  from typing import Union, BinaryIO
8
+ from .misc import prepare_upload_data
8
9
 
9
10
  class ChunkrAsync(ChunkrBase):
10
11
  """Asynchronous Chunkr API client"""
@@ -43,10 +44,23 @@ class ChunkrAsync(ChunkrBase):
43
44
  Returns:
44
45
  TaskResponse: The completed task response
45
46
  """
46
- task = await self.start_upload(file, config)
47
+ task = await self.create_task(file, config)
47
48
  return await task.poll_async()
49
+
50
+ async def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
+ """Update a task by its ID and wait for processing to complete.
52
+
53
+ Args:
54
+ task_id: The ID of the task to update
55
+ config: Configuration options for processing. Optional.
48
56
 
49
- async def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
57
+ Returns:
58
+ TaskResponse: The updated task response
59
+ """
60
+ task = await self.update_task(task_id, config)
61
+ return await task.poll_async()
62
+
63
+ async def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
50
64
  """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll_async()`.
51
65
 
52
66
  Args:
@@ -80,16 +94,26 @@ class ChunkrAsync(ChunkrBase):
80
94
  Returns:
81
95
  TaskResponse: The initial task response
82
96
  """
83
- files, data = self._prepare_upload_data(file, config)
97
+ files = prepare_upload_data(file, config)
84
98
  r = await self._client.post(
85
99
  f"{self.url}/api/v1/task",
86
100
  files=files,
87
- json=config.model_dump() if config else {},
88
101
  headers=self._headers()
89
102
  )
90
103
  r.raise_for_status()
91
104
  return TaskResponse(**r.json()).with_client(self)
92
105
 
106
+ async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
+ files = prepare_upload_data(None, config)
108
+ r = await self._client.patch(
109
+ f"{self.url}/api/v1/task/{task_id}",
110
+ files=files,
111
+ headers=self._headers()
112
+ )
113
+
114
+ r.raise_for_status()
115
+ return TaskResponse(**r.json()).with_client(self)
116
+
93
117
  async def get_task(self, task_id: str) -> TaskResponse:
94
118
  r = await self._client.get(
95
119
  f"{self.url}/api/v1/task/{task_id}",
@@ -97,7 +121,22 @@ class ChunkrAsync(ChunkrBase):
97
121
  )
98
122
  r.raise_for_status()
99
123
  return TaskResponse(**r.json()).with_client(self)
124
+
125
+ async def delete_task(self, task_id: str) -> None:
126
+ r = await self._client.delete(
127
+ f"{self.url}/api/v1/task/{task_id}",
128
+ headers=self._headers()
129
+ )
130
+ r.raise_for_status()
131
+
132
+ async def cancel_task(self, task_id: str) -> None:
133
+ r = await self._client.get(
134
+ f"{self.url}/api/v1/task/{task_id}/cancel",
135
+ headers=self._headers()
136
+ )
137
+ r.raise_for_status()
100
138
 
139
+
101
140
  async def __aenter__(self):
102
141
  return self
103
142
 
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, Field, model_validator
1
+ from pydantic import BaseModel, Field, model_validator, ConfigDict
2
2
  from enum import Enum
3
3
  from typing import Optional, List, Dict
4
4
 
@@ -10,30 +10,30 @@ class CroppingStrategy(str, Enum):
10
10
  ALL = "All"
11
11
  AUTO = "Auto"
12
12
 
13
- class LlmConfig(BaseModel):
14
- model: str
15
- prompt: str
16
- temperature: float = 0.0
17
-
18
13
  class GenerationConfig(BaseModel):
19
14
  html: Optional[GenerationStrategy] = None
20
- llm: Optional[LlmConfig] = None
15
+ llm: Optional[str] = None
21
16
  markdown: Optional[GenerationStrategy] = None
22
17
  crop_image: Optional[CroppingStrategy] = None
23
18
 
24
19
  class SegmentProcessing(BaseModel):
25
- title: Optional[GenerationConfig] = None
26
- section_header: Optional[GenerationConfig] = None
27
- text: Optional[GenerationConfig] = None
28
- list_item: Optional[GenerationConfig] = None
29
- table: Optional[GenerationConfig] = None
30
- picture: Optional[GenerationConfig] = None
31
- caption: Optional[GenerationConfig] = None
32
- formula: Optional[GenerationConfig] = None
33
- footnote: Optional[GenerationConfig] = None
34
- page_header: Optional[GenerationConfig] = None
35
- page_footer: Optional[GenerationConfig] = None
36
- page: Optional[GenerationConfig] = None
20
+ model_config = ConfigDict(
21
+ populate_by_name=True,
22
+ alias_generator=str.title
23
+ )
24
+
25
+ title: Optional[GenerationConfig] = Field(default=None, alias="Title")
26
+ section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
27
+ text: Optional[GenerationConfig] = Field(default=None, alias="Text")
28
+ list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
29
+ table: Optional[GenerationConfig] = Field(default=None, alias="Table")
30
+ picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
31
+ caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
32
+ formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
33
+ footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
34
+ page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
35
+ page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
36
+ page: Optional[GenerationConfig] = Field(default=None, alias="Page")
37
37
 
38
38
  class ChunkProcessing(BaseModel):
39
39
  target_length: Optional[int] = None
@@ -0,0 +1,106 @@
1
+ import io
2
+ import json
3
+ from pathlib import Path
4
+ from PIL import Image
5
+ import requests
6
+ from typing import Union, Tuple, BinaryIO, Optional
7
+ from .config import Configuration
8
+
9
+
10
+ def prepare_file(
11
+ file: Union[str, Path, BinaryIO, Image.Image]
12
+ ) -> Tuple[str, BinaryIO]:
13
+ """Convert various file types into a tuple of (filename, file-like object)."""
14
+ # Handle URLs
15
+ if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
16
+ response = requests.get(file)
17
+ response.raise_for_status()
18
+ file_obj = io.BytesIO(response.content)
19
+ filename = Path(file.split('/')[-1]).name or 'downloaded_file'
20
+ return filename, file_obj
21
+
22
+ # Handle base64 strings
23
+ if isinstance(file, str) and ',' in file and ';base64,' in file:
24
+ try:
25
+ # Split header and data
26
+ header, base64_data = file.split(',', 1)
27
+ import base64
28
+ file_bytes = base64.b64decode(base64_data)
29
+ file_obj = io.BytesIO(file_bytes)
30
+
31
+ # Try to determine format from header
32
+ format = 'bin'
33
+ mime_type = header.split(':')[-1].split(';')[0].lower()
34
+
35
+ # Map MIME types to file extensions
36
+ mime_to_ext = {
37
+ 'application/pdf': 'pdf',
38
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
39
+ 'application/msword': 'doc',
40
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
41
+ 'application/vnd.ms-powerpoint': 'ppt',
42
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
43
+ 'application/vnd.ms-excel': 'xls',
44
+ 'image/jpeg': 'jpg',
45
+ 'image/png': 'png',
46
+ 'image/jpg': 'jpg'
47
+ }
48
+
49
+ if mime_type in mime_to_ext:
50
+ format = mime_to_ext[mime_type]
51
+ else:
52
+ raise ValueError(f"Unsupported MIME type: {mime_type}")
53
+
54
+ return f"file.{format}", file_obj
55
+ except Exception as e:
56
+ raise ValueError(f"Invalid base64 string: {str(e)}")
57
+
58
+ # Handle file paths
59
+ if isinstance(file, (str, Path)):
60
+ path = Path(file).resolve()
61
+ if not path.exists():
62
+ raise FileNotFoundError(f"File not found: {file}")
63
+ return path.name, open(path, 'rb')
64
+
65
+ # Handle PIL Images
66
+ if isinstance(file, Image.Image):
67
+ img_byte_arr = io.BytesIO()
68
+ format = file.format or 'PNG'
69
+ file.save(img_byte_arr, format=format)
70
+ img_byte_arr.seek(0)
71
+ return f"image.{format.lower()}", img_byte_arr
72
+
73
+ # Handle file-like objects
74
+ if hasattr(file, 'read') and hasattr(file, 'seek'):
75
+ # Try to get the filename from the file object if possible
76
+ name = getattr(file, 'name', 'document') if hasattr(file, 'name') else 'document'
77
+ return Path(name).name, file
78
+
79
+ raise TypeError(f"Unsupported file type: {type(file)}")
80
+
81
+
82
+
83
+ def prepare_upload_data(
84
+ file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
85
+ config: Optional[Configuration] = None
86
+ ) -> dict:
87
+ """Prepare files and data dictionaries for upload.
88
+
89
+ Args:
90
+ file: The file to upload
91
+ config: Optional configuration settings
92
+
93
+ Returns:
94
+ dict: (files dict) ready for upload
95
+ """
96
+ files = {}
97
+ if file:
98
+ filename, file_obj = prepare_file(file)
99
+ files = {"file": (filename, file_obj)}
100
+
101
+ if config:
102
+ config_dict = config.model_dump(mode="json", exclude_none=True)
103
+ for key, value in config_dict.items():
104
+ files[key] = (None, json.dumps(value), 'application/json')
105
+
106
+ return files
@@ -1,5 +1,6 @@
1
1
  from .protocol import ChunkrClientProtocol
2
2
  from .config import Configuration, OutputResponse
3
+ from .misc import prepare_upload_data
3
4
  import asyncio
4
5
  from datetime import datetime
5
6
  from enum import Enum
@@ -12,6 +13,7 @@ class Status(str, Enum):
12
13
  PROCESSING = "Processing"
13
14
  SUCCEEDED = "Succeeded"
14
15
  FAILED = "Failed"
16
+ CANCELLED = "Cancelled"
15
17
 
16
18
  class TaskResponse(BaseModel):
17
19
  configuration: Configuration
@@ -24,6 +26,7 @@ class TaskResponse(BaseModel):
24
26
  output: Optional[OutputResponse]
25
27
  page_count: Optional[int]
26
28
  pdf_url: Optional[str]
29
+ started_at: Optional[datetime]
27
30
  status: Status
28
31
  task_id: str
29
32
  task_url: Optional[str]
@@ -57,8 +60,9 @@ class TaskResponse(BaseModel):
57
60
  while True:
58
61
  try:
59
62
  r = await self._client._client.get(self.task_url, headers=self._client._headers())
60
- await r.raise_for_status()
61
- return await r.json()
63
+ r.raise_for_status()
64
+ response = r.json()
65
+ return response
62
66
  except (ConnectionError, TimeoutError) as _:
63
67
  print("Connection error while polling the task, retrying...")
64
68
  await asyncio.sleep(0.5)
@@ -77,7 +81,8 @@ class TaskResponse(BaseModel):
77
81
  """Poll the task for completion."""
78
82
  while True:
79
83
  response = self._poll_request_sync()
80
- self.__dict__.update(response)
84
+ updated_task = TaskResponse(**response).with_client(self._client)
85
+ self.__dict__.update(updated_task.__dict__)
81
86
 
82
87
  if result := self._check_status():
83
88
  return result
@@ -88,7 +93,8 @@ class TaskResponse(BaseModel):
88
93
  """Poll the task for completion asynchronously."""
89
94
  while True:
90
95
  response = await self._poll_request_async()
91
- self.__dict__.update(response)
96
+ updated_task = TaskResponse(**response).with_client(self._client)
97
+ self.__dict__.update(updated_task.__dict__)
92
98
 
93
99
  if result := self._check_status():
94
100
  return result
@@ -106,6 +112,56 @@ class TaskResponse(BaseModel):
106
112
  if content:
107
113
  parts.append(content)
108
114
  return "\n".join(parts)
115
+
116
+ def update(self, config: Configuration) -> 'TaskResponse':
117
+ files = prepare_upload_data(None, config)
118
+ r = self._client._session.patch(
119
+ f"{self.task_url}",
120
+ files=files,
121
+ headers=self._client._headers()
122
+ )
123
+ r.raise_for_status()
124
+ return TaskResponse(**r.json()).with_client(self._client)
125
+
126
+ async def update_async(self, config: Configuration) -> 'TaskResponse':
127
+ files = prepare_upload_data(None, config)
128
+ r = await self._client._client.patch(
129
+ f"{self.task_url}",
130
+ files=files,
131
+ headers=self._client._headers()
132
+ )
133
+ r.raise_for_status()
134
+ return TaskResponse(**r.json()).with_client(self._client)
135
+
136
+ def cancel(self):
137
+ r = self._client._session.get(
138
+ f"{self.task_url}/cancel",
139
+ headers=self._client._headers()
140
+ )
141
+ r.raise_for_status()
142
+ self.poll()
143
+
144
+ async def cancel_async(self):
145
+ r = await self._client._client.get(
146
+ f"{self.task_url}/cancel",
147
+ headers=self._client._headers()
148
+ )
149
+ r.raise_for_status()
150
+ await self.poll_async()
151
+
152
+ def delete(self):
153
+ r = self._client._session.delete(
154
+ f"{self.task_url}",
155
+ headers=self._client._headers()
156
+ )
157
+ r.raise_for_status()
158
+
159
+ async def delete_async(self):
160
+ r = await self._client._client.delete(
161
+ f"{self.task_url}",
162
+ headers=self._client._headers()
163
+ )
164
+ r.raise_for_status()
109
165
 
110
166
  def html(self) -> str:
111
167
  """Get full HTML for the task"""
@@ -117,15 +173,4 @@ class TaskResponse(BaseModel):
117
173
 
118
174
  def content(self) -> str:
119
175
  """Get full text for the task"""
120
- return self._get_content("content")
121
-
122
- class TaskPayload(BaseModel):
123
- current_configuration: Configuration
124
- file_name: str
125
- image_folder_location: str
126
- input_location: str
127
- output_location: str
128
- pdf_location: str
129
- previous_configuration: Optional[Configuration]
130
- task_id: str
131
- user_id: str
176
+ return self._get_content("content")
@@ -0,0 +1,12 @@
1
+ from chunkr_ai.api.chunkr import Chunkr
2
+ from chunkr_ai.models import Configuration
3
+ from chunkr_ai.api.config import SegmentationStrategy, ChunkProcessing
4
+
5
+ if __name__ == "__main__":
6
+ chunkr = Chunkr()
7
+ task = chunkr.update_task("556b4fe5-e3f7-48dc-9f56-0fb7fbacdb87", Configuration(
8
+ chunk_processing=ChunkProcessing(
9
+ target_length=1000
10
+ )
11
+ ))
12
+ print(task)
@@ -8,7 +8,6 @@ from .api.config import (
8
8
  GenerationStrategy,
9
9
  GenerationConfig,
10
10
  JsonSchema,
11
- LlmConfig,
12
11
  Model,
13
12
  OCRResult,
14
13
  OcrStrategy,
@@ -20,7 +19,7 @@ from .api.config import (
20
19
  SegmentationStrategy,
21
20
  )
22
21
 
23
- from .api.task import TaskResponse, TaskPayload, Status
22
+ from .api.task import TaskResponse, Status
24
23
 
25
24
  __all__ = [
26
25
  'BoundingBox',
@@ -43,6 +42,5 @@ __all__ = [
43
42
  'SegmentType',
44
43
  'SegmentationStrategy',
45
44
  'Status',
46
- 'TaskPayload',
47
45
  'TaskResponse'
48
46
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -9,6 +9,7 @@ License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.28.1
10
10
  Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: pydantic>=2.10.4
12
+ Requires-Dist: pytest-asyncio>=0.25.2
12
13
  Requires-Dist: python-dotenv>=1.0.1
13
14
  Requires-Dist: requests>=2.32.3
14
15
  Provides-Extra: test
@@ -16,6 +16,7 @@ src/chunkr_ai/api/base.py
16
16
  src/chunkr_ai/api/chunkr.py
17
17
  src/chunkr_ai/api/chunkr_async.py
18
18
  src/chunkr_ai/api/config.py
19
+ src/chunkr_ai/api/misc.py
19
20
  src/chunkr_ai/api/protocol.py
20
21
  src/chunkr_ai/api/task.py
21
22
  tests/test_chunkr.py
@@ -1,6 +1,7 @@
1
1
  httpx>=0.28.1
2
2
  pillow>=11.1.0
3
3
  pydantic>=2.10.4
4
+ pytest-asyncio>=0.25.2
4
5
  python-dotenv>=1.0.1
5
6
  requests>=2.32.3
6
7