chunkr-ai 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/base.py CHANGED
@@ -3,13 +3,10 @@ from .task import TaskResponse
3
3
  from .auth import HeadersMixin
4
4
  from abc import abstractmethod
5
5
  from dotenv import load_dotenv
6
- import io
7
- import json
8
6
  import os
9
7
  from pathlib import Path
10
8
  from PIL import Image
11
- import requests
12
- from typing import BinaryIO, Tuple, Union
9
+ from typing import BinaryIO, Union
13
10
 
14
11
  class ChunkrBase(HeadersMixin):
15
12
  """Base class with shared functionality for Chunkr API clients."""
@@ -30,140 +27,38 @@ class ChunkrBase(HeadersMixin):
30
27
 
31
28
  self.url = self.url.rstrip("/")
32
29
 
33
- def _prepare_file(
34
- self,
35
- file: Union[str, Path, BinaryIO, Image.Image]
36
- ) -> Tuple[str, BinaryIO]:
37
- """Convert various file types into a tuple of (filename, file-like object).
38
-
39
- Args:
40
- file: Input file, can be:
41
- - String or Path to a file
42
- - URL string starting with http:// or https://
43
- - Base64 string
44
- - Opened binary file (mode='rb')
45
- - PIL/Pillow Image object
46
-
47
- Returns:
48
- Tuple[str, BinaryIO]: (filename, file-like object) ready for upload
49
-
50
- Raises:
51
- FileNotFoundError: If the file path doesn't exist
52
- TypeError: If the file type is not supported
53
- ValueError: If the URL is invalid or unreachable
54
- ValueError: If the MIME type is unsupported
55
- """
56
- # Handle URLs
57
- if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
58
- response = requests.get(file)
59
- response.raise_for_status()
60
- file_obj = io.BytesIO(response.content)
61
- filename = Path(file.split('/')[-1]).name or 'downloaded_file'
62
- return filename, file_obj
63
-
64
- # Handle base64 strings
65
- if isinstance(file, str) and ',' in file and ';base64,' in file:
66
- try:
67
- # Split header and data
68
- header, base64_data = file.split(',', 1)
69
- import base64
70
- file_bytes = base64.b64decode(base64_data)
71
- file_obj = io.BytesIO(file_bytes)
72
-
73
- # Try to determine format from header
74
- format = 'bin'
75
- mime_type = header.split(':')[-1].split(';')[0].lower()
76
-
77
- # Map MIME types to file extensions
78
- mime_to_ext = {
79
- 'application/pdf': 'pdf',
80
- 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
81
- 'application/msword': 'doc',
82
- 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
83
- 'application/vnd.ms-powerpoint': 'ppt',
84
- 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
85
- 'application/vnd.ms-excel': 'xls',
86
- 'image/jpeg': 'jpg',
87
- 'image/png': 'png',
88
- 'image/jpg': 'jpg'
89
- }
90
-
91
- if mime_type in mime_to_ext:
92
- format = mime_to_ext[mime_type]
93
- else:
94
- raise ValueError(f"Unsupported MIME type: {mime_type}")
95
-
96
- return f"file.{format}", file_obj
97
- except Exception as e:
98
- raise ValueError(f"Invalid base64 string: {str(e)}")
99
-
100
- # Handle file paths
101
- if isinstance(file, (str, Path)):
102
- path = Path(file).resolve()
103
- if not path.exists():
104
- raise FileNotFoundError(f"File not found: {file}")
105
- return path.name, open(path, 'rb')
106
-
107
- # Handle PIL Images
108
- if isinstance(file, Image.Image):
109
- img_byte_arr = io.BytesIO()
110
- format = file.format or 'PNG'
111
- file.save(img_byte_arr, format=format)
112
- img_byte_arr.seek(0)
113
- return f"image.{format.lower()}", img_byte_arr
114
-
115
- # Handle file-like objects
116
- if hasattr(file, 'read') and hasattr(file, 'seek'):
117
- # Try to get the filename from the file object if possible
118
- name = getattr(file, 'name', 'document') if hasattr(file, 'name') else 'document'
119
- return Path(name).name, file
120
-
121
- raise TypeError(f"Unsupported file type: {type(file)}")
122
-
123
- def _prepare_upload_data(
124
- self,
125
- file: Union[str, Path, BinaryIO, Image.Image],
126
- config: Configuration = None
127
- ) -> Tuple[dict, dict]:
128
- """Prepare files and data dictionaries for upload.
30
+ @abstractmethod
31
+ def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
32
+ """Upload a file and wait for processing to complete.
129
33
 
130
- Args:
131
- file: The file to upload
132
- config: Optional configuration settings
133
-
134
- Returns:
135
- Tuple[dict, dict]: (files dict, data dict) ready for upload
34
+ Must be implemented by subclasses.
136
35
  """
137
- filename, file_obj = self._prepare_file(file)
138
- files = {"file": (filename, file_obj)}
139
- data = {}
140
-
141
- if config:
142
- config_dict = config.model_dump(mode="json", exclude_none=True)
143
- for key, value in config_dict.items():
144
- if isinstance(value, dict):
145
- files[key] = (None, json.dumps(value), 'application/json')
146
- else:
147
- data[key] = value
148
-
149
- return files, data
36
+ pass
150
37
 
151
38
  @abstractmethod
152
- def upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
153
- """Upload a file and wait for processing to complete.
39
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
40
+ """Update a task by its ID.
154
41
 
155
42
  Must be implemented by subclasses.
156
43
  """
157
44
  pass
158
45
 
159
46
  @abstractmethod
160
- def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
47
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
161
48
  """Upload a file for processing and immediately return the task response.
162
49
 
163
50
  Must be implemented by subclasses.
164
51
  """
165
52
  pass
166
53
 
54
+ @abstractmethod
55
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
56
+ """Update a task by its ID.
57
+
58
+ Must be implemented by subclasses.
59
+ """
60
+ pass
61
+
167
62
  @abstractmethod
168
63
  def get_task(self, task_id: str) -> TaskResponse:
169
64
  """Get a task response by its ID.
@@ -171,3 +66,20 @@ class ChunkrBase(HeadersMixin):
171
66
  Must be implemented by subclasses.
172
67
  """
173
68
  pass
69
+
70
+ @abstractmethod
71
+ def delete_task(self, task_id: str) -> None:
72
+ """Delete a task by its ID.
73
+
74
+ Must be implemented by subclasses.
75
+ """
76
+ pass
77
+
78
+ @abstractmethod
79
+ def cancel_task(self, task_id: str) -> None:
80
+ """Cancel a task by its ID.
81
+
82
+ Must be implemented by subclasses.
83
+ """
84
+ pass
85
+
chunkr_ai/api/chunkr.py CHANGED
@@ -5,6 +5,7 @@ from pathlib import Path
5
5
  from PIL import Image
6
6
  import requests
7
7
  from typing import Union, BinaryIO
8
+ from .misc import prepare_upload_data
8
9
 
9
10
  class Chunkr(ChunkrBase):
10
11
  """Chunkr API client"""
@@ -43,10 +44,23 @@ class Chunkr(ChunkrBase):
43
44
  Returns:
44
45
  TaskResponse: The completed task response
45
46
  """
46
- task = self.start_upload(file, config)
47
+ task = self.create_task(file, config)
47
48
  return task.poll()
49
+
50
+ def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
+ """Update a task by its ID and wait for processing to complete.
52
+
53
+ Args:
54
+ task_id: The ID of the task to update
55
+ config: Configuration options for processing. Optional.
48
56
 
49
- def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
57
+ Returns:
58
+ TaskResponse: The updated task response
59
+ """
60
+ task = self.update_task(task_id, config)
61
+ return task.poll()
62
+
63
+ def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
50
64
  """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll()`
51
65
 
52
66
  Args:
@@ -80,16 +94,35 @@ class Chunkr(ChunkrBase):
80
94
  Returns:
81
95
  TaskResponse: The initial task response
82
96
  """
83
- files, data = self._prepare_upload_data(file, config)
97
+ files= prepare_upload_data(file, config)
84
98
  r = self._session.post(
85
99
  f"{self.url}/api/v1/task",
86
100
  files=files,
87
- data=data,
88
101
  headers=self._headers()
89
102
  )
90
103
  r.raise_for_status()
91
104
  return TaskResponse(**r.json()).with_client(self)
105
+
106
+ def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
+ """Update a task by its ID.
108
+
109
+ Args:
110
+ task_id: The ID of the task to update
111
+ config: The new configuration to use
92
112
 
113
+ Returns:
114
+ TaskResponse: The updated task response
115
+ """
116
+ files = prepare_upload_data(None, config)
117
+ r = self._session.patch(
118
+ f"{self.url}/api/v1/task/{task_id}",
119
+ files=files,
120
+ headers=self._headers()
121
+ )
122
+
123
+ r.raise_for_status()
124
+ return TaskResponse(**r.json()).with_client(self)
125
+
93
126
  def get_task(self, task_id: str) -> TaskResponse:
94
127
  """Get a task response by its ID.
95
128
 
@@ -106,3 +139,29 @@ class Chunkr(ChunkrBase):
106
139
  r.raise_for_status()
107
140
  return TaskResponse(**r.json()).with_client(self)
108
141
 
142
+
143
+ def delete_task(self, task_id: str) -> None:
144
+ """Delete a task by its ID.
145
+
146
+ Args:
147
+ task_id: The ID of the task to delete
148
+ """
149
+ r = self._session.delete(
150
+ f"{self.url}/api/v1/task/{task_id}",
151
+ headers=self._headers()
152
+ )
153
+ r.raise_for_status()
154
+
155
+ def cancel_task(self, task_id: str) -> None:
156
+ """Cancel a task by its ID.
157
+
158
+ Args:
159
+ task_id: The ID of the task to cancel
160
+ """
161
+ r = self._session.get(
162
+ f"{self.url}/api/v1/task/{task_id}/cancel",
163
+ headers=self._headers()
164
+ )
165
+ r.raise_for_status()
166
+
167
+
@@ -5,6 +5,7 @@ import httpx
5
5
  from pathlib import Path
6
6
  from PIL import Image
7
7
  from typing import Union, BinaryIO
8
+ from .misc import prepare_upload_data
8
9
 
9
10
  class ChunkrAsync(ChunkrBase):
10
11
  """Asynchronous Chunkr API client"""
@@ -43,10 +44,23 @@ class ChunkrAsync(ChunkrBase):
43
44
  Returns:
44
45
  TaskResponse: The completed task response
45
46
  """
46
- task = await self.start_upload(file, config)
47
+ task = await self.create_task(file, config)
47
48
  return await task.poll_async()
49
+
50
+ async def update(self, task_id: str, config: Configuration) -> TaskResponse:
51
+ """Update a task by its ID and wait for processing to complete.
52
+
53
+ Args:
54
+ task_id: The ID of the task to update
55
+ config: Configuration options for processing. Optional.
48
56
 
49
- async def start_upload(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
57
+ Returns:
58
+ TaskResponse: The updated task response
59
+ """
60
+ task = await self.update_task(task_id, config)
61
+ return await task.poll_async()
62
+
63
+ async def create_task(self, file: Union[str, Path, BinaryIO, Image.Image], config: Configuration = None) -> TaskResponse:
50
64
  """Upload a file for processing and immediately return the task response. It will not wait for processing to complete. To wait for the full processing to complete, use `task.poll_async()`.
51
65
 
52
66
  Args:
@@ -80,16 +94,26 @@ class ChunkrAsync(ChunkrBase):
80
94
  Returns:
81
95
  TaskResponse: The initial task response
82
96
  """
83
- files, data = self._prepare_upload_data(file, config)
97
+ files = prepare_upload_data(file, config)
84
98
  r = await self._client.post(
85
99
  f"{self.url}/api/v1/task",
86
100
  files=files,
87
- json=config.model_dump() if config else {},
88
101
  headers=self._headers()
89
102
  )
90
103
  r.raise_for_status()
91
104
  return TaskResponse(**r.json()).with_client(self)
92
105
 
106
+ async def update_task(self, task_id: str, config: Configuration) -> TaskResponse:
107
+ files = prepare_upload_data(None, config)
108
+ r = await self._client.patch(
109
+ f"{self.url}/api/v1/task/{task_id}",
110
+ files=files,
111
+ headers=self._headers()
112
+ )
113
+
114
+ r.raise_for_status()
115
+ return TaskResponse(**r.json()).with_client(self)
116
+
93
117
  async def get_task(self, task_id: str) -> TaskResponse:
94
118
  r = await self._client.get(
95
119
  f"{self.url}/api/v1/task/{task_id}",
@@ -97,7 +121,22 @@ class ChunkrAsync(ChunkrBase):
97
121
  )
98
122
  r.raise_for_status()
99
123
  return TaskResponse(**r.json()).with_client(self)
124
+
125
+ async def delete_task(self, task_id: str) -> None:
126
+ r = await self._client.delete(
127
+ f"{self.url}/api/v1/task/{task_id}",
128
+ headers=self._headers()
129
+ )
130
+ r.raise_for_status()
131
+
132
+ async def cancel_task(self, task_id: str) -> None:
133
+ r = await self._client.get(
134
+ f"{self.url}/api/v1/task/{task_id}/cancel",
135
+ headers=self._headers()
136
+ )
137
+ r.raise_for_status()
100
138
 
139
+
101
140
  async def __aenter__(self):
102
141
  return self
103
142
 
chunkr_ai/api/config.py CHANGED
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, Field, model_validator
1
+ from pydantic import BaseModel, Field, model_validator, ConfigDict
2
2
  from enum import Enum
3
3
  from typing import Optional, List, Dict
4
4
 
@@ -10,30 +10,30 @@ class CroppingStrategy(str, Enum):
10
10
  ALL = "All"
11
11
  AUTO = "Auto"
12
12
 
13
- class LlmConfig(BaseModel):
14
- model: str
15
- prompt: str
16
- temperature: float = 0.0
17
-
18
13
  class GenerationConfig(BaseModel):
19
14
  html: Optional[GenerationStrategy] = None
20
- llm: Optional[LlmConfig] = None
15
+ llm: Optional[str] = None
21
16
  markdown: Optional[GenerationStrategy] = None
22
17
  crop_image: Optional[CroppingStrategy] = None
23
18
 
24
19
  class SegmentProcessing(BaseModel):
25
- title: Optional[GenerationConfig] = None
26
- section_header: Optional[GenerationConfig] = None
27
- text: Optional[GenerationConfig] = None
28
- list_item: Optional[GenerationConfig] = None
29
- table: Optional[GenerationConfig] = None
30
- picture: Optional[GenerationConfig] = None
31
- caption: Optional[GenerationConfig] = None
32
- formula: Optional[GenerationConfig] = None
33
- footnote: Optional[GenerationConfig] = None
34
- page_header: Optional[GenerationConfig] = None
35
- page_footer: Optional[GenerationConfig] = None
36
- page: Optional[GenerationConfig] = None
20
+ model_config = ConfigDict(
21
+ populate_by_name=True,
22
+ alias_generator=str.title
23
+ )
24
+
25
+ title: Optional[GenerationConfig] = Field(default=None, alias="Title")
26
+ section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
27
+ text: Optional[GenerationConfig] = Field(default=None, alias="Text")
28
+ list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
29
+ table: Optional[GenerationConfig] = Field(default=None, alias="Table")
30
+ picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
31
+ caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
32
+ formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
33
+ footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
34
+ page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
35
+ page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
36
+ page: Optional[GenerationConfig] = Field(default=None, alias="Page")
37
37
 
38
38
  class ChunkProcessing(BaseModel):
39
39
  target_length: Optional[int] = None
@@ -86,9 +86,9 @@ class Segment(BaseModel):
86
86
  bbox: BoundingBox
87
87
  content: str
88
88
  page_height: float
89
- html: Optional[str]
90
- image: Optional[str]
91
- markdown: Optional[str]
89
+ html: Optional[str] = None
90
+ image: Optional[str] = None
91
+ markdown: Optional[str] = None
92
92
  ocr: List[OCRResult]
93
93
  page_number: int
94
94
  page_width: float
@@ -104,8 +104,8 @@ class ExtractedJson(BaseModel):
104
104
  data: Dict
105
105
 
106
106
  class OutputResponse(BaseModel):
107
- chunks: List[Chunk] = []
108
- extracted_json: Optional[ExtractedJson]
107
+ chunks: List[Chunk]
108
+ extracted_json: Optional[ExtractedJson] = Field(default=None)
109
109
 
110
110
  class Model(str, Enum):
111
111
  FAST = "Fast"
chunkr_ai/api/misc.py ADDED
@@ -0,0 +1,106 @@
1
+ import io
2
+ import json
3
+ from pathlib import Path
4
+ from PIL import Image
5
+ import requests
6
+ from typing import Union, Tuple, BinaryIO, Optional
7
+ from .config import Configuration
8
+
9
+
10
+ def prepare_file(
11
+ file: Union[str, Path, BinaryIO, Image.Image]
12
+ ) -> Tuple[str, BinaryIO]:
13
+ """Convert various file types into a tuple of (filename, file-like object)."""
14
+ # Handle URLs
15
+ if isinstance(file, str) and (file.startswith('http://') or file.startswith('https://')):
16
+ response = requests.get(file)
17
+ response.raise_for_status()
18
+ file_obj = io.BytesIO(response.content)
19
+ filename = Path(file.split('/')[-1]).name or 'downloaded_file'
20
+ return filename, file_obj
21
+
22
+ # Handle base64 strings
23
+ if isinstance(file, str) and ',' in file and ';base64,' in file:
24
+ try:
25
+ # Split header and data
26
+ header, base64_data = file.split(',', 1)
27
+ import base64
28
+ file_bytes = base64.b64decode(base64_data)
29
+ file_obj = io.BytesIO(file_bytes)
30
+
31
+ # Try to determine format from header
32
+ format = 'bin'
33
+ mime_type = header.split(':')[-1].split(';')[0].lower()
34
+
35
+ # Map MIME types to file extensions
36
+ mime_to_ext = {
37
+ 'application/pdf': 'pdf',
38
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
39
+ 'application/msword': 'doc',
40
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
41
+ 'application/vnd.ms-powerpoint': 'ppt',
42
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
43
+ 'application/vnd.ms-excel': 'xls',
44
+ 'image/jpeg': 'jpg',
45
+ 'image/png': 'png',
46
+ 'image/jpg': 'jpg'
47
+ }
48
+
49
+ if mime_type in mime_to_ext:
50
+ format = mime_to_ext[mime_type]
51
+ else:
52
+ raise ValueError(f"Unsupported MIME type: {mime_type}")
53
+
54
+ return f"file.{format}", file_obj
55
+ except Exception as e:
56
+ raise ValueError(f"Invalid base64 string: {str(e)}")
57
+
58
+ # Handle file paths
59
+ if isinstance(file, (str, Path)):
60
+ path = Path(file).resolve()
61
+ if not path.exists():
62
+ raise FileNotFoundError(f"File not found: {file}")
63
+ return path.name, open(path, 'rb')
64
+
65
+ # Handle PIL Images
66
+ if isinstance(file, Image.Image):
67
+ img_byte_arr = io.BytesIO()
68
+ format = file.format or 'PNG'
69
+ file.save(img_byte_arr, format=format)
70
+ img_byte_arr.seek(0)
71
+ return f"image.{format.lower()}", img_byte_arr
72
+
73
+ # Handle file-like objects
74
+ if hasattr(file, 'read') and hasattr(file, 'seek'):
75
+ # Try to get the filename from the file object if possible
76
+ name = getattr(file, 'name', 'document') if hasattr(file, 'name') else 'document'
77
+ return Path(name).name, file
78
+
79
+ raise TypeError(f"Unsupported file type: {type(file)}")
80
+
81
+
82
+
83
+ def prepare_upload_data(
84
+ file: Optional[Union[str, Path, BinaryIO, Image.Image]] = None,
85
+ config: Optional[Configuration] = None
86
+ ) -> dict:
87
+ """Prepare files and data dictionaries for upload.
88
+
89
+ Args:
90
+ file: The file to upload
91
+ config: Optional configuration settings
92
+
93
+ Returns:
94
+ dict: (files dict) ready for upload
95
+ """
96
+ files = {}
97
+ if file:
98
+ filename, file_obj = prepare_file(file)
99
+ files = {"file": (filename, file_obj)}
100
+
101
+ if config:
102
+ config_dict = config.model_dump(mode="json", exclude_none=True)
103
+ for key, value in config_dict.items():
104
+ files[key] = (None, json.dumps(value), 'application/json')
105
+
106
+ return files
chunkr_ai/api/task.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from .protocol import ChunkrClientProtocol
2
2
  from .config import Configuration, OutputResponse
3
+ from .misc import prepare_upload_data
3
4
  import asyncio
4
5
  from datetime import datetime
5
6
  from enum import Enum
@@ -12,22 +13,23 @@ class Status(str, Enum):
12
13
  PROCESSING = "Processing"
13
14
  SUCCEEDED = "Succeeded"
14
15
  FAILED = "Failed"
16
+ CANCELLED = "Cancelled"
15
17
 
16
18
  class TaskResponse(BaseModel):
17
19
  configuration: Configuration
18
20
  created_at: datetime
19
- expires_at: Optional[datetime]
20
- file_name: Optional[str]
21
- finished_at: Optional[datetime]
22
- input_file_url: Optional[str]
21
+ expires_at: Optional[datetime] = None
22
+ file_name: Optional[str] = None
23
+ finished_at: Optional[datetime] = None
24
+ input_file_url: Optional[str] = None
23
25
  message: str
24
- output: Optional[OutputResponse]
25
- page_count: Optional[int]
26
- pdf_url: Optional[str]
27
- started_at: Optional[datetime]
26
+ output: Optional[OutputResponse] = None
27
+ page_count: Optional[int] = None
28
+ pdf_url: Optional[str] = None
29
+ started_at: Optional[datetime] = None
28
30
  status: Status
29
31
  task_id: str
30
- task_url: Optional[str]
32
+ task_url: Optional[str] = None
31
33
  _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
32
34
 
33
35
  def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponse':
@@ -79,7 +81,8 @@ class TaskResponse(BaseModel):
79
81
  """Poll the task for completion."""
80
82
  while True:
81
83
  response = self._poll_request_sync()
82
- self.__dict__.update(response)
84
+ updated_task = TaskResponse(**response).with_client(self._client)
85
+ self.__dict__.update(updated_task.__dict__)
83
86
 
84
87
  if result := self._check_status():
85
88
  return result
@@ -90,7 +93,8 @@ class TaskResponse(BaseModel):
90
93
  """Poll the task for completion asynchronously."""
91
94
  while True:
92
95
  response = await self._poll_request_async()
93
- self.__dict__.update(response)
96
+ updated_task = TaskResponse(**response).with_client(self._client)
97
+ self.__dict__.update(updated_task.__dict__)
94
98
 
95
99
  if result := self._check_status():
96
100
  return result
@@ -108,6 +112,56 @@ class TaskResponse(BaseModel):
108
112
  if content:
109
113
  parts.append(content)
110
114
  return "\n".join(parts)
115
+
116
+ def update(self, config: Configuration) -> 'TaskResponse':
117
+ files = prepare_upload_data(None, config)
118
+ r = self._client._session.patch(
119
+ f"{self.task_url}",
120
+ files=files,
121
+ headers=self._client._headers()
122
+ )
123
+ r.raise_for_status()
124
+ return TaskResponse(**r.json()).with_client(self._client)
125
+
126
+ async def update_async(self, config: Configuration) -> 'TaskResponse':
127
+ files = prepare_upload_data(None, config)
128
+ r = await self._client._client.patch(
129
+ f"{self.task_url}",
130
+ files=files,
131
+ headers=self._client._headers()
132
+ )
133
+ r.raise_for_status()
134
+ return TaskResponse(**r.json()).with_client(self._client)
135
+
136
+ def cancel(self):
137
+ r = self._client._session.get(
138
+ f"{self.task_url}/cancel",
139
+ headers=self._client._headers()
140
+ )
141
+ r.raise_for_status()
142
+ self.poll()
143
+
144
+ async def cancel_async(self):
145
+ r = await self._client._client.get(
146
+ f"{self.task_url}/cancel",
147
+ headers=self._client._headers()
148
+ )
149
+ r.raise_for_status()
150
+ await self.poll_async()
151
+
152
+ def delete(self):
153
+ r = self._client._session.delete(
154
+ f"{self.task_url}",
155
+ headers=self._client._headers()
156
+ )
157
+ r.raise_for_status()
158
+
159
+ async def delete_async(self):
160
+ r = await self._client._client.delete(
161
+ f"{self.task_url}",
162
+ headers=self._client._headers()
163
+ )
164
+ r.raise_for_status()
111
165
 
112
166
  def html(self) -> str:
113
167
  """Get full HTML for the task"""
chunkr_ai/main.py CHANGED
@@ -0,0 +1,12 @@
1
+ from chunkr_ai.api.chunkr import Chunkr
2
+ from chunkr_ai.models import Configuration
3
+ from chunkr_ai.api.config import SegmentationStrategy, ChunkProcessing
4
+
5
+ if __name__ == "__main__":
6
+ chunkr = Chunkr()
7
+ task = chunkr.update_task("556b4fe5-e3f7-48dc-9f56-0fb7fbacdb87", Configuration(
8
+ chunk_processing=ChunkProcessing(
9
+ target_length=1000
10
+ )
11
+ ))
12
+ print(task)
chunkr_ai/models.py CHANGED
@@ -8,7 +8,6 @@ from .api.config import (
8
8
  GenerationStrategy,
9
9
  GenerationConfig,
10
10
  JsonSchema,
11
- LlmConfig,
12
11
  Model,
13
12
  OCRResult,
14
13
  OcrStrategy,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -0,0 +1,18 @@
1
+ chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
+ chunkr_ai/main.py,sha256=_MT1lcnNiXjVW9ZkZYl28SB_f6M9g_IOgZxvhodTzAo,394
3
+ chunkr_ai/models.py,sha256=T8_F-Y1US21ZJVzLIaroqp-Hd0_ZFbdkbEOxr63-PNE,827
4
+ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
7
+ chunkr_ai/api/base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
8
+ chunkr_ai/api/chunkr.py,sha256=PmrK37HbK2T1KUPitKnt4wZqIujL61Jo12qW9DEpNMI,5186
9
+ chunkr_ai/api/chunkr_async.py,sha256=2yYyAO9-j2xKQYH0fJb2S6gL26hgbtL4QyqlG9l0QBY,4893
10
+ chunkr_ai/api/config.py,sha256=XIqXZ_8q7U_BEmY5wyIC9mbQGZBw1956EN9yhC4svD0,4235
11
+ chunkr_ai/api/misc.py,sha256=tScsUUcrqeVh_bZv1YlbmjGkQSTDQN8NyKxoNwAG6XA,3792
12
+ chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
13
+ chunkr_ai/api/task.py,sha256=EB6RK8ms7EaNj57tNJZoNgNMHGWKXFhkQ1WC7gk5ht4,6059
14
+ chunkr_ai-0.0.8.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ chunkr_ai-0.0.8.dist-info/METADATA,sha256=tL3OZfFIRsgfIKoDYWAS89bZw48_0C8cdqHJ6_GrT7A,4844
16
+ chunkr_ai-0.0.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
17
+ chunkr_ai-0.0.8.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
18
+ chunkr_ai-0.0.8.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
- chunkr_ai/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- chunkr_ai/models.py,sha256=kNeYtBO4TFvQWKFCent7tLEQjyKlVUieKNiuTt3u564,842
4
- chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
7
- chunkr_ai/api/base.py,sha256=WDHx8tU0fl9_-yvYTKL-U0uaxHv-8_bRfiw9Xkl-mWM,6499
8
- chunkr_ai/api/chunkr.py,sha256=LkBFzGB_T0y3fnBeIn_nwQW6Mb7eZO-iTlzWrmWBoko,3450
9
- chunkr_ai/api/chunkr_async.py,sha256=B9deRVoe4h3Csh_jEuQxuxQ-DKSuZPdwkanFTyfHmeM,3603
10
- chunkr_ai/api/config.py,sha256=K0s1giImciPksu-bO9gzRwUaK2Vo1nxNKQkXlRQ2cb8,3785
11
- chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
12
- chunkr_ai/api/task.py,sha256=_WOGRirlLEow_wS9kJB_dNYb2RvYE9nlu7Spq16AhME,4172
13
- chunkr_ai-0.0.6.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- chunkr_ai-0.0.6.dist-info/METADATA,sha256=TuBBU6n1g7kdLVky2vAx94TFWZVyu8PqQ_47vi6tN5E,4844
15
- chunkr_ai-0.0.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- chunkr_ai-0.0.6.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
- chunkr_ai-0.0.6.dist-info/RECORD,,