chunkr-ai 0.0.5__tar.gz → 0.0.7__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. {chunkr_ai-0.0.5/src/chunkr_ai.egg-info → chunkr_ai-0.0.7}/PKG-INFO +2 -1
  2. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/pyproject.toml +3 -1
  3. chunkr_ai-0.0.7/src/chunkr_ai/api/base.py +85 -0
  4. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/chunkr.py +63 -4
  5. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/chunkr_async.py +43 -4
  6. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/config.py +19 -19
  7. chunkr_ai-0.0.7/src/chunkr_ai/api/misc.py +106 -0
  8. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/task.py +61 -16
  9. chunkr_ai-0.0.7/src/chunkr_ai/main.py +12 -0
  10. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/models.py +1 -3
  11. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7/src/chunkr_ai.egg-info}/PKG-INFO +2 -1
  12. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai.egg-info/SOURCES.txt +1 -0
  13. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai.egg-info/requires.txt +1 -0
  14. chunkr_ai-0.0.7/tests/test_chunkr.py +329 -0
  15. chunkr_ai-0.0.5/src/chunkr_ai/api/base.py +0 -173
  16. chunkr_ai-0.0.5/src/chunkr_ai/main.py +0 -0
  17. chunkr_ai-0.0.5/tests/test_chunkr.py +0 -158
  18. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/LICENSE +0 -0
  19. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/README.md +0 -0
  20. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/setup.cfg +0 -0
  21. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/__init__.py +0 -0
  22. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/__init__.py +0 -0
  23. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/api.py +0 -0
  24. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/auth.py +0 -0
  25. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai/api/protocol.py +0 -0
  26. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  27. {chunkr_ai-0.0.5 → chunkr_ai-0.0.7}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -9,6 +9,7 @@ License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.28.1
10
10
  Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: pydantic>=2.10.4
12
+ Requires-Dist: pytest-asyncio>=0.25.2
12
13
  Requires-Dist: python-dotenv>=1.0.1
13
14
  Requires-Dist: requests>=2.32.3
14
15
  Provides-Extra: test
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.5"
7
+ version = "0.0.7"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -14,6 +14,7 @@ dependencies = [
14
14
  "httpx>=0.28.1",
15
15
  "pillow>=11.1.0",
16
16
  "pydantic>=2.10.4",
17
+ "pytest-asyncio>=0.25.2",
17
18
  "python-dotenv>=1.0.1",
18
19
  "requests>=2.32.3",
19
20
  ]
@@ -23,3 +24,4 @@ test = [
23
24
  "pytest>=8.3.4",
24
25
  "pytest-xdist>=3.6.1",
25
26
  ]
27
+
@@ -0,0 +1,85 @@
1
+ from .config import Configuration
2
+ from .task import TaskResponse
3
+ from .auth import HeadersMixin
4
+ from abc import abstractmethod
5
+ from dotenv import load_dotenv
6
+ import os
7
+ from pathlib import Path
8
+ from PIL import Image
9
+ from typing import BinaryIO, Union
10
+
11
+ class ChunkrBase(HeadersMixin):
12
+ """Base class with shared functionality for Chunkr API clients."""
13
+
14
+ def __init__(self, url: str = None, api_key: str = None):
15
+ load_dotenv()
16
+ self.url = (
17
+ url or
18
+ os.getenv('CHUNKR_URL') or
19
+ 'https://api.chunkr.ai'
20
+ )
21
+ self._api_key = (
22
+ api_key or
23
+ os.getenv('CHUNKR_API_KEY')
24
+ )
25
+ if not self._api_key:
26
+ raise ValueError("API key must be provided either directly, in .env file, or as CHUNKR_API_KEY environment variable. You can get an api key at: https://www.chunkr.ai")
27
+
28
+ self.url = self.url.rstrip("/")
29
+
30
+ @abstractmethod
31
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
32
+ """Upload a file and wait for processing to complete.
33
+
34
+ Must be implemented by subclasses.
35
+ """
36
+ pass
37
+
38
+ @abstractmethod
39
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
40
+ """Update a task by its ID.
41
+
42
+ Must be implemented by subclasses.
43
+ """
44
+ pass
45
+
46
+ @abstractmethod
47
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
48
+ """Upload a file for processing and immediately return the task response.
49
+
50
+ Must be implemented by subclasses.
51
+ """
52
+ pass
53
+
54
+ @abstractmethod
55
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
56
+ """Update a task by its ID.
57
+
58
+ Must be implemented by subclasses.
59
+ """
60
+ pass
61
+
62
+ @abstractmethod
63
+ def get_task(self, task_id: str) -> TaskResponse:
64
+ """Get a task response by its ID.
65
+
66
+ Must be implemented by subclasses.
67
+ """
68
+ pass
69
+
70
+ @abstractmethod
71
+ def delete_task(self, task_id: str) -> None:
72
+ """Delete a task by its ID.
73
+
74
+ Must be implemented by subclasses.
75
+ """
76
+ pass
77
+
78
+ @abstractmethod
79
+ def cancel_task(self, task_id: str) -> None:
80
+ """Cancel a task by its ID.
81
+
82
+ Must be implemented by subclasses.
83
+ """
84
+ pass
85
+
@@ -5,6 +5,7 @@ from pathlib import Path
5
5
  from PIL import Image
6
6
  import requests
7
7
  from typing import Union, BinaryIO
8
+ from .misc import prepare_upload_data
8
9
 
9
10
  class Chunkr(ChunkrBase):
10
11
  """Chunkr API client"""
@@ -43,10 +44,23 @@ class Chunkr(ChunkrBase):
43
44
  Returns:
44
45
  TaskResponse: The completed task response
45
46
  """
46
- task = self.start_upload(file, config)
47
+ task = self.create_task(file, config)
47
48
  return task.poll()
49
+
50
+ def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
+ """Update a task by its ID and wait for processing to complete.
52
+
53
+ Args:
54
+ task_id: The ID of the task to update
55
+ config: Configuration options for processing. Optional.
48
56
 
49
- def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
57
+ Returns:
58
+ TaskResponse: The updated task response
59
+ """
60
+ task = self.update_task(task_id, config)
61
+ return task.poll()
62
+
63
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
50
64
  """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`
51
65
 
52
66
  Args:
@@ -80,16 +94,35 @@ class Chunkr(ChunkrBase):
80
94
  Returns:
81
95
  TaskResponse: The initial task response
82
96
  """
83
- files, data = self._prepare_upload_data(file, config)
97
+ files= prepare_upload_data(file, config)
84
98
  r = self._session.post(
85
99
  f"{self.url}/api/v1/task",
86
100
  files=files,
87
- data=data,
88
101
  headers=self._headers()
89
102
  )
90
103
  r.raise_for_status()
91
104
  return TaskResponse(**r.json()).with_client(self)
105
+
106
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
+ """Update a task by its ID.
108
+
109
+ Args:
110
+ task_id: The ID of the task to update
111
+ config: The new configuration to use
92
112
 
113
+ Returns:
114
+ TaskResponse: The updated task response
115
+ """
116
+ files = prepare_upload_data(None, config)
117
+ r = self._session.patch(
118
+ f"{self.url}/api/v1/task/{task_id}",
119
+ files=files,
120
+ headers=self._headers()
121
+ )
122
+
123
+ r.raise_for_status()
124
+ return TaskResponse(**r.json()).with_client(self)
125
+
93
126
  def get_task(self, task_id: str) -> TaskResponse:
94
127
  """Get a task response by its ID.
95
128
 
@@ -106,3 +139,29 @@ class Chunkr(ChunkrBase):
106
139
  r.raise_for_status()
107
140
  return TaskResponse(**r.json()).with_client(self)
108
141
 
142
+
143
+ def delete_task(self, task_id: str) -> None:
144
+ """Delete a task by its ID.
145
+
146
+ Args:
147
+ task_id: The ID of the task to delete
148
+ """
149
+ r = self._session.delete(
150
+ f"{self.url}/api/v1/task/{task_id}",
151
+ headers=self._headers()
152
+ )
153
+ r.raise_for_status()
154
+
155
+ def cancel_task(self, task_id: str) -> None:
156
+ """Cancel a task by its ID.
157
+
158
+ Args:
159
+ task_id: The ID of the task to cancel
160
+ """
161
+ r = self._session.get(
162
+ f"{self.url}/api/v1/task/{task_id}/cancel",
163
+ headers=self._headers()
164
+ )
165
+ r.raise_for_status()
166
+
167
+
@@ -5,6 +5,7 @@ import httpx
5
5
  from pathlib import Path
6
6
  from PIL import Image
7
7
  from typing import Union, BinaryIO
8
+ from .misc import prepare_upload_data
8
9
 
9
10
  class ChunkrAsync(ChunkrBase):
10
11
  """Asynchronous Chunkr API client"""
@@ -43,10 +44,23 @@ class ChunkrAsync(ChunkrBase):
43
44
  Returns:
44
45
  TaskResponse: The completed task response
45
46
  """
46
- task = await self.start_upload(file, config)
47
+ task = await self.create_task(file, config)
47
48
  return await task.poll_async()
49
+
50
+ async def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
+ """Update a task by its ID and wait for processing to complete.
52
+
53
+ Args:
54
+ task_id: The ID of the task to update
55
+ config: Configuration options for processing. Optional.
48
56
 
49
- async def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
57
+ Returns:
58
+ TaskResponse: The updated task response
59
+ """
60
+ task = await self.update_task(task_id, config)
61
+ return await task.poll_async()
62
+
63
+ async def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
50
64
  """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll_async()`.
51
65
 
52
66
  Args:
@@ -80,16 +94,26 @@ class ChunkrAsync(ChunkrBase):
80
94
  Returns:
81
95
  TaskResponse: The initial task response
82
96
  """
83
- files, data = self._prepare_upload_data(file, config)
97
+ files = prepare_upload_data(file, config)
84
98
  r = await self._client.post(
85
99
  f"{self.url}/api/v1/task",
86
100
  files=files,
87
- json=config.model_dump() if config else {},
88
101
  headers=self._headers()
89
102
  )
90
103
  r.raise_for_status()
91
104
  return TaskResponse(**r.json()).with_client(self)
92
105
 
106
+ async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
+ files = prepare_upload_data(None, config)
108
+ r = await self._client.patch(
109
+ f"{self.url}/api/v1/task/{task_id}",
110
+ files=files,
111
+ headers=self._headers()
112
+ )
113
+
114
+ r.raise_for_status()
115
+ return TaskResponse(**r.json()).with_client(self)
116
+
93
117
  async def get_task(self, task_id: str) -> TaskResponse:
94
118
  r = await self._client.get(
95
119
  f"{self.url}/api/v1/task/{task_id}",
@@ -97,7 +121,22 @@ class ChunkrAsync(ChunkrBase):
97
121
  )
98
122
  r.raise_for_status()
99
123
  return TaskResponse(**r.json()).with_client(self)
124
+
125
+ async def delete_task(self, task_id: str) -> None:
126
+ r = await self._client.delete(
127
+ f"{self.url}/api/v1/task/{task_id}",
128
+ headers=self._headers()
129
+ )
130
+ r.raise_for_status()
131
+
132
+ async def cancel_task(self, task_id: str) -> None:
133
+ r = await self._client.get(
134
+ f"{self.url}/api/v1/task/{task_id}/cancel",
135
+ headers=self._headers()
136
+ )
137
+ r.raise_for_status()
100
138
 
139
+
101
140
  async def __aenter__(self):
102
141
  return self
103
142
 
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, Field, model_validator
1
+ from pydantic import BaseModel, Field, model_validator, ConfigDict
2
2
  from enum import Enum
3
3
  from typing import Optional, List, Dict
4
4
 
@@ -10,30 +10,30 @@ class CroppingStrategy(str, Enum):
10
10
  ALL = "All"
11
11
  AUTO = "Auto"
12
12
 
13
- class LlmConfig(BaseModel):
14
- model: str
15
- prompt: str
16
- temperature: float = 0.0
17
-
18
13
  class GenerationConfig(BaseModel):
19
14
  html: Optional[GenerationStrategy] = None
20
- llm: Optional[LlmConfig] = None
15
+ llm: Optional[str] = None
21
16
  markdown: Optional[GenerationStrategy] = None
22
17
  crop_image: Optional[CroppingStrategy] = None
23
18
 
24
19
  class SegmentProcessing(BaseModel):
25
- title: Optional[GenerationConfig] = None
26
- section_header: Optional[GenerationConfig] = None
27
- text: Optional[GenerationConfig] = None
28
- list_item: Optional[GenerationConfig] = None
29
- table: Optional[GenerationConfig] = None
30
- picture: Optional[GenerationConfig] = None
31
- caption: Optional[GenerationConfig] = None
32
- formula: Optional[GenerationConfig] = None
33
- footnote: Optional[GenerationConfig] = None
34
- page_header: Optional[GenerationConfig] = None
35
- page_footer: Optional[GenerationConfig] = None
36
- page: Optional[GenerationConfig] = None
20
+ model_config = ConfigDict(
21
+ populate_by_name=True,
22
+ alias_generator=str.title
23
+ )
24
+
25
+ title: Optional[GenerationConfig] = Field(default=None, alias="Title")
26
+ section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
27
+ text: Optional[GenerationConfig] = Field(default=None, alias="Text")
28
+ list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
29
+ table: Optional[GenerationConfig] = Field(default=None, alias="Table")
30
+ picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
31
+ caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
32
+ formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
33
+ footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
34
+ page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
35
+ page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
36
+ page: Optional[GenerationConfig] = Field(default=None, alias="Page")
37
37
 
38
38
  class ChunkProcessing(BaseModel):
39
39
  target_length: Optional[int] = None
@@ -0,0 +1,106 @@
1
+ import io
2
+ import json
3
+ from pathlib import Path
4
+ from PIL import Image
5
+ import requests
6
+ from typing import Union, Tuple, BinaryIO, Optional
7
+ from .config import Configuration
8
+
9
+
10
+ def prepare_file(
11
+ file: Union[str, Path, BinaryIO, Image.Image]
12
+ ) -> Tuple[str, BinaryIO]:
13
+ """Convert various file types into a tuple of (filename, file-like object)."""
14
+ # Handle URLs
15
+ if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
16
+ response = requests.get(file)
17
+ response.raise_for_status()
18
+ file_obj = io.BytesIO(response.content)
19
+ filename = Path(file.split('/')[-1]).name or 'downloaded_file'
20
+ return filename, file_obj
21
+
22
+ # Handle base64 strings
23
+ if isinstance(file, str) and ',' in file and ';base64,' in file:
24
+ try:
25
+ # Split header and data
26
+ header, base64_data = file.split(',', 1)
27
+ import base64
28
+ file_bytes = base64.b64decode(base64_data)
29
+ file_obj = io.BytesIO(file_bytes)
30
+
31
+ # Try to determine format from header
32
+ format = 'bin'
33
+ mime_type = header.split(':')[-1].split(';')[0].lower()
34
+
35
+ # Map MIME types to file extensions
36
+ mime_to_ext = {
37
+ 'application/pdf': 'pdf',
38
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
39
+ 'application/msword': 'doc',
40
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
41
+ 'application/vnd.ms-powerpoint': 'ppt',
42
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
43
+ 'application/vnd.ms-excel': 'xls',
44
+ 'image/jpeg': 'jpg',
45
+ 'image/png': 'png',
46
+ 'image/jpg': 'jpg'
47
+ }
48
+
49
+ if mime_type in mime_to_ext:
50
+ format = mime_to_ext[mime_type]
51
+ else:
52
+ raise ValueError(f"Unsupported MIME type: {mime_type}")
53
+
54
+ return f"file.{format}", file_obj
55
+ except Exception as e:
56
+ raise ValueError(f"Invalid base64 string: {str(e)}")
57
+
58
+ # Handle file paths
59
+ if isinstance(file, (str, Path)):
60
+ path = Path(file).resolve()
61
+ if not path.exists():
62
+ raise FileNotFoundError(f"File not found: {file}")
63
+ return path.name, open(path, 'rb')
64
+
65
+ # Handle PIL Images
66
+ if isinstance(file, Image.Image):
67
+ img_byte_arr = io.BytesIO()
68
+ format = file.format or 'PNG'
69
+ file.save(img_byte_arr, format=format)
70
+ img_byte_arr.seek(0)
71
+ return f"image.{format.lower()}", img_byte_arr
72
+
73
+ # Handle file-like objects
74
+ if hasattr(file, 'read') and hasattr(file, 'seek'):
75
+ # Try to get the filename from the file object if possible
76
+ name = getattr(file, 'name', 'document') if hasattr(file, 'name') else 'document'
77
+ return Path(name).name, file
78
+
79
+ raise TypeError(f"Unsupported file type: {type(file)}")
80
+
81
+
82
+
83
+ def prepare_upload_data(
84
+ file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
85
+ config: Optional[Configuration] = None
86
+ ) -> dict:
87
+ """Prepare files and data dictionaries for upload.
88
+
89
+ Args:
90
+ file: The file to upload
91
+ config: Optional configuration settings
92
+
93
+ Returns:
94
+ dict: (files dict) ready for upload
95
+ """
96
+ files = {}
97
+ if file:
98
+ filename, file_obj = prepare_file(file)
99
+ files = {"file": (filename, file_obj)}
100
+
101
+ if config:
102
+ config_dict = config.model_dump(mode="json", exclude_none=True)
103
+ for key, value in config_dict.items():
104
+ files[key] = (None, json.dumps(value), 'application/json')
105
+
106
+ return files
@@ -1,5 +1,6 @@
1
1
  from .protocol import ChunkrClientProtocol
2
2
  from .config import Configuration, OutputResponse
3
+ from .misc import prepare_upload_data
3
4
  import asyncio
4
5
  from datetime import datetime
5
6
  from enum import Enum
@@ -12,6 +13,7 @@ class Status(str, Enum):
12
13
  PROCESSING = "Processing"
13
14
  SUCCEEDED = "Succeeded"
14
15
  FAILED = "Failed"
16
+ CANCELLED = "Cancelled"
15
17
 
16
18
  class TaskResponse(BaseModel):
17
19
  configuration: Configuration
@@ -24,6 +26,7 @@ class TaskResponse(BaseModel):
24
26
  output: Optional[OutputResponse]
25
27
  page_count: Optional[int]
26
28
  pdf_url: Optional[str]
29
+ started_at: Optional[datetime]
27
30
  status: Status
28
31
  task_id: str
29
32
  task_url: Optional[str]
@@ -57,8 +60,9 @@ class TaskResponse(BaseModel):
57
60
  while True:
58
61
  try:
59
62
  r = await self._client._client.get(self.task_url, headers=self._client._headers())
60
- await r.raise_for_status()
61
- return await r.json()
63
+ r.raise_for_status()
64
+ response = r.json()
65
+ return response
62
66
  except (ConnectionError, TimeoutError) as _:
63
67
  print("Connection error while polling the task, retrying...")
64
68
  await asyncio.sleep(0.5)
@@ -77,7 +81,8 @@ class TaskResponse(BaseModel):
77
81
  """Poll the task for completion."""
78
82
  while True:
79
83
  response = self._poll_request_sync()
80
- self.__dict__.update(response)
84
+ updated_task = TaskResponse(**response).with_client(self._client)
85
+ self.__dict__.update(updated_task.__dict__)
81
86
 
82
87
  if result := self._check_status():
83
88
  return result
@@ -88,7 +93,8 @@ class TaskResponse(BaseModel):
88
93
  """Poll the task for completion asynchronously."""
89
94
  while True:
90
95
  response = await self._poll_request_async()
91
- self.__dict__.update(response)
96
+ updated_task = TaskResponse(**response).with_client(self._client)
97
+ self.__dict__.update(updated_task.__dict__)
92
98
 
93
99
  if result := self._check_status():
94
100
  return result
@@ -106,6 +112,56 @@ class TaskResponse(BaseModel):
106
112
  if content:
107
113
  parts.append(content)
108
114
  return "\n".join(parts)
115
+
116
+ def update(self, config: Configuration) -> 'TaskResponse':
117
+ files = prepare_upload_data(None, config)
118
+ r = self._client._session.patch(
119
+ f"{self.task_url}",
120
+ files=files,
121
+ headers=self._client._headers()
122
+ )
123
+ r.raise_for_status()
124
+ return TaskResponse(**r.json()).with_client(self._client)
125
+
126
+ async def update_async(self, config: Configuration) -> 'TaskResponse':
127
+ files = prepare_upload_data(None, config)
128
+ r = await self._client._client.patch(
129
+ f"{self.task_url}",
130
+ files=files,
131
+ headers=self._client._headers()
132
+ )
133
+ r.raise_for_status()
134
+ return TaskResponse(**r.json()).with_client(self._client)
135
+
136
+ def cancel(self):
137
+ r = self._client._session.get(
138
+ f"{self.task_url}/cancel",
139
+ headers=self._client._headers()
140
+ )
141
+ r.raise_for_status()
142
+ self.poll()
143
+
144
+ async def cancel_async(self):
145
+ r = await self._client._client.get(
146
+ f"{self.task_url}/cancel",
147
+ headers=self._client._headers()
148
+ )
149
+ r.raise_for_status()
150
+ await self.poll_async()
151
+
152
+ def delete(self):
153
+ r = self._client._session.delete(
154
+ f"{self.task_url}",
155
+ headers=self._client._headers()
156
+ )
157
+ r.raise_for_status()
158
+
159
+ async def delete_async(self):
160
+ r = await self._client._client.delete(
161
+ f"{self.task_url}",
162
+ headers=self._client._headers()
163
+ )
164
+ r.raise_for_status()
109
165
 
110
166
  def html(self) -> str:
111
167
  """Get full HTML for the task"""
@@ -117,15 +173,4 @@ class TaskResponse(BaseModel):
117
173
 
118
174
  def content(self) -> str:
119
175
  """Get full text for the task"""
120
- return self._get_content("content")
121
-
122
- class TaskPayload(BaseModel):
123
- current_configuration: Configuration
124
- file_name: str
125
- image_folder_location: str
126
- input_location: str
127
- output_location: str
128
- pdf_location: str
129
- previous_configuration: Optional[Configuration]
130
- task_id: str
131
- user_id: str
176
+ return self._get_content("content")
@@ -0,0 +1,12 @@
1
+ from chunkr_ai.api.chunkr import Chunkr
2
+ from chunkr_ai.models import Configuration
3
+ from chunkr_ai.api.config import SegmentationStrategy, ChunkProcessing
4
+
5
+ if __name__ == "__main__":
6
+ chunkr = Chunkr()
7
+ task = chunkr.update_task("556b4fe5-e3f7-48dc-9f56-0fb7fbacdb87", Configuration(
8
+ chunk_processing=ChunkProcessing(
9
+ target_length=1000
10
+ )
11
+ ))
12
+ print(task)
@@ -8,7 +8,6 @@ from .api.config import (
8
8
  GenerationStrategy,
9
9
  GenerationConfig,
10
10
  JsonSchema,
11
- LlmConfig,
12
11
  Model,
13
12
  OCRResult,
14
13
  OcrStrategy,
@@ -20,7 +19,7 @@ from .api.config import (
20
19
  SegmentationStrategy,
21
20
  )
22
21
 
23
- from .api.task import TaskResponse, TaskPayload, Status
22
+ from .api.task import TaskResponse, Status
24
23
 
25
24
  __all__ = [
26
25
  'BoundingBox',
@@ -43,6 +42,5 @@ __all__ = [
43
42
  'SegmentType',
44
43
  'SegmentationStrategy',
45
44
  'Status',
46
- 'TaskPayload',
47
45
  'TaskResponse'
48
46
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -9,6 +9,7 @@ License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.28.1
10
10
  Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: pydantic>=2.10.4
12
+ Requires-Dist: pytest-asyncio>=0.25.2
12
13
  Requires-Dist: python-dotenv>=1.0.1
13
14
  Requires-Dist: requests>=2.32.3
14
15
  Provides-Extra: test
@@ -16,6 +16,7 @@ src/chunkr_ai/api/base.py
16
16
  src/chunkr_ai/api/chunkr.py
17
17
  src/chunkr_ai/api/chunkr_async.py
18
18
  src/chunkr_ai/api/config.py
19
+ src/chunkr_ai/api/misc.py
19
20
  src/chunkr_ai/api/protocol.py
20
21
  src/chunkr_ai/api/task.py
21
22
  tests/test_chunkr.py
@@ -1,6 +1,7 @@
1
1
  httpx>=0.28.1
2
2
  pillow>=11.1.0
3
3
  pydantic>=2.10.4
4
+ pytest-asyncio>=0.25.2
4
5
  python-dotenv>=1.0.1
5
6
  requests>=2.32.3
6
7