chunkr-ai 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/config.py CHANGED
@@ -142,4 +142,4 @@ class Status(str, Enum):
142
142
  PROCESSING = "Processing"
143
143
  SUCCEEDED = "Succeeded"
144
144
  FAILED = "Failed"
145
- CANCELLED = "Cancelled"
145
+ CANCELLED = "Cancelled"
chunkr_ai/api/task.py CHANGED
@@ -1,38 +1,10 @@
1
- from .protocol import ChunkrClientProtocol
2
- from .config import Configuration, OutputResponse, Status
1
+ from .config import Configuration
3
2
  from .misc import prepare_upload_data
4
- import asyncio
5
- from datetime import datetime
6
- from pydantic import BaseModel, PrivateAttr
3
+ from .task_base import TaskBase
7
4
  import time
8
- from typing import Optional, Union
9
-
10
- class TaskResponse(BaseModel):
11
- configuration: Configuration
12
- created_at: datetime
13
- expires_at: Optional[datetime] = None
14
- file_name: Optional[str] = None
15
- finished_at: Optional[datetime] = None
16
- input_file_url: Optional[str] = None
17
- message: str
18
- output: Optional[OutputResponse] = None
19
- page_count: Optional[int] = None
20
- pdf_url: Optional[str] = None
21
- started_at: Optional[datetime] = None
22
- status: Status
23
- task_id: str
24
- task_url: Optional[str] = None
25
- _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
26
-
27
- def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponse':
28
- self._client = client
29
- return self
30
-
31
- def _poll_request_sync(self) -> dict:
32
- """Helper method to make polling request with retry logic (synchronous)"""
33
- if not self.task_url:
34
- raise ValueError("Task URL not found in response")
35
5
 
6
+ class TaskResponse(TaskBase):
7
+ def _poll_request(self) -> dict:
36
8
  while True:
37
9
  try:
38
10
  r = self._client._session.get(self.task_url, headers=self._client._headers())
@@ -44,68 +16,20 @@ class TaskResponse(BaseModel):
44
16
  except Exception as e:
45
17
  raise
46
18
 
47
- async def _poll_request_async(self) -> dict:
48
- """Helper method to make polling request with retry logic (asynchronous)"""
19
+ def poll(self) -> 'TaskResponse':
49
20
  if not self.task_url:
50
21
  raise ValueError("Task URL not found in response")
51
-
52
- while True:
53
- try:
54
- r = await self._client._client.get(self.task_url, headers=self._client._headers())
55
- r.raise_for_status()
56
- response = r.json()
57
- return response
58
- except (ConnectionError, TimeoutError) as _:
59
- print("Connection error while polling the task, retrying...")
60
- await asyncio.sleep(0.5)
61
- except Exception as e:
62
- raise
63
-
64
- def _check_status(self) -> Optional['TaskResponse']:
65
- """Helper method to check task status and handle completion/failure"""
66
- if self.status == "Failed":
67
- raise ValueError(self.message)
68
- if self.status not in ("Starting", "Processing"):
69
- return self
70
- return None
71
-
72
- def poll(self) -> 'TaskResponse':
73
- """Poll the task for completion."""
74
22
  while True:
75
23
  response = self._poll_request_sync()
76
24
  updated_task = TaskResponse(**response).with_client(self._client)
77
25
  self.__dict__.update(updated_task.__dict__)
78
-
79
26
  if result := self._check_status():
80
27
  return result
81
-
82
28
  time.sleep(0.5)
83
-
84
- async def poll_async(self) -> 'TaskResponse':
85
- """Poll the task for completion asynchronously."""
86
- while True:
87
- response = await self._poll_request_async()
88
- updated_task = TaskResponse(**response).with_client(self._client)
89
- self.__dict__.update(updated_task.__dict__)
90
-
91
- if result := self._check_status():
92
- return result
93
-
94
- await asyncio.sleep(0.5)
95
-
96
- def _get_content(self, content_type: str) -> str:
97
- """Helper method to get either HTML, Markdown, or raw content."""
98
- if not self.output:
99
- return ""
100
- parts = []
101
- for c in self.output.chunks:
102
- for s in c.segments:
103
- content = getattr(s, content_type)
104
- if content:
105
- parts.append(content)
106
- return "\n".join(parts)
107
29
 
108
30
  def update(self, config: Configuration) -> 'TaskResponse':
31
+ if not self.task_url:
32
+ raise ValueError("Task URL not found")
109
33
  files = prepare_upload_data(None, config)
110
34
  r = self._client._session.patch(
111
35
  f"{self.task_url}",
@@ -113,56 +37,25 @@ class TaskResponse(BaseModel):
113
37
  headers=self._client._headers()
114
38
  )
115
39
  r.raise_for_status()
116
- return TaskResponse(**r.json()).with_client(self._client)
117
-
118
- async def update_async(self, config: Configuration) -> 'TaskResponse':
119
- files = prepare_upload_data(None, config)
120
- r = await self._client._client.patch(
121
- f"{self.task_url}",
122
- files=files,
123
- headers=self._client._headers()
124
- )
125
- r.raise_for_status()
126
- return TaskResponse(**r.json()).with_client(self._client)
40
+ updated = TaskResponse(**r.json()).with_client(self._client)
41
+ self.__dict__.update(updated.__dict__)
42
+ return self.poll()
127
43
 
128
44
  def cancel(self):
45
+ if not self.task_url:
46
+ raise ValueError("Task URL not found")
129
47
  r = self._client._session.get(
130
48
  f"{self.task_url}/cancel",
131
49
  headers=self._client._headers()
132
50
  )
133
51
  r.raise_for_status()
134
52
  self.poll()
135
-
136
- async def cancel_async(self):
137
- r = await self._client._client.get(
138
- f"{self.task_url}/cancel",
139
- headers=self._client._headers()
140
- )
141
- r.raise_for_status()
142
- await self.poll_async()
143
53
 
144
54
  def delete(self):
55
+ if not self.task_url:
56
+ raise ValueError("Task URL not found")
145
57
  r = self._client._session.delete(
146
- f"{self.task_url}",
58
+ self.task_url,
147
59
  headers=self._client._headers()
148
60
  )
149
61
  r.raise_for_status()
150
-
151
- async def delete_async(self):
152
- r = await self._client._client.delete(
153
- f"{self.task_url}",
154
- headers=self._client._headers()
155
- )
156
- r.raise_for_status()
157
-
158
- def html(self) -> str:
159
- """Get full HTML for the task"""
160
- return self._get_content("html")
161
-
162
- def markdown(self) -> str:
163
- """Get full markdown for the task"""
164
- return self._get_content("markdown")
165
-
166
- def content(self) -> str:
167
- """Get full text for the task"""
168
- return self._get_content("content")
@@ -1,34 +1,23 @@
1
- import asyncio
2
- from pydantic import BaseModel, PrivateAttr
3
- from datetime import datetime
4
- from typing import Optional, Union
5
- from .task_base import TaskBase
6
- from .protocol import ChunkrClientProtocol
7
- from .config import Configuration, OutputResponse, Status
1
+ from .config import Configuration
8
2
  from .misc import prepare_upload_data
3
+ from .task_base import TaskBase
4
+ import asyncio
9
5
 
10
- class TaskResponseAsync(BaseModel, TaskBase):
11
- configuration: Configuration
12
- created_at: datetime
13
- expires_at: Optional[datetime]
14
- file_name: Optional[str]
15
- finished_at: Optional[datetime]
16
- input_file_url: Optional[str]
17
- message: str
18
- output: Optional[OutputResponse]
19
- page_count: Optional[int]
20
- pdf_url: Optional[str]
21
- started_at: Optional[datetime]
22
- status: Status
23
- task_id: str
24
- task_url: Optional[str]
25
- _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
26
-
27
- def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponseAsync':
28
- self._client = client
29
- return self
6
+ class TaskResponseAsync(TaskBase):
7
+ async def _poll_request(self) -> dict:
8
+ try:
9
+ r = await self._client._client.get(self.task_url, headers=self._client._headers())
10
+ r.raise_for_status()
11
+ return r.json()
12
+ except (ConnectionError, TimeoutError) as _:
13
+ print("Connection error while polling the task, retrying...")
14
+ await asyncio.sleep(0.5)
15
+ except Exception as e:
16
+ raise
30
17
 
31
18
  async def poll(self) -> 'TaskResponseAsync':
19
+ if not self.task_url:
20
+ raise ValueError("Task URL not found")
32
21
  while True:
33
22
  j = await self._poll_request()
34
23
  updated = TaskResponseAsync(**j).with_client(self._client)
@@ -37,28 +26,6 @@ class TaskResponseAsync(BaseModel, TaskBase):
37
26
  return res
38
27
  await asyncio.sleep(0.5)
39
28
 
40
- async def _poll_request(self) -> dict:
41
- if not self.task_url:
42
- raise ValueError("Task URL not found")
43
- while True:
44
- try:
45
- r = await self._client._client.get(self.task_url, headers=self._client._headers())
46
- r.raise_for_status()
47
- return r.json()
48
- except Exception as e:
49
- if self.status == Status.FAILED:
50
- raise ValueError(self.message) from e
51
- await asyncio.sleep(0.5)
52
-
53
- def _check_status(self) -> Optional['TaskResponseAsync']:
54
- if self.status == Status.FAILED:
55
- raise ValueError(f"Task failed: {self.message}")
56
- if self.status == Status.CANCELLED:
57
- return self
58
- if self.status not in [Status.STARTING, Status.PROCESSING]:
59
- return self
60
- return None
61
-
62
29
  async def update(self, config: Configuration) -> 'TaskResponseAsync':
63
30
  if not self.task_url:
64
31
  raise ValueError("Task URL not found")
@@ -77,27 +44,7 @@ class TaskResponseAsync(BaseModel, TaskBase):
77
44
  return await self.poll()
78
45
 
79
46
  async def delete(self):
47
+ if not self.task_url:
48
+ raise ValueError("Task URL not found")
80
49
  r = await self._client._client.delete(self.task_url, headers=self._client._headers())
81
- r.raise_for_status()
82
-
83
- def html(self) -> str:
84
- return self._get_content("html")
85
-
86
- def markdown(self) -> str:
87
- return self._get_content("markdown")
88
-
89
- def content(self) -> str:
90
- return self._get_content("content")
91
-
92
- def _get_content(self, t: str) -> str:
93
- if not self.output:
94
- return ""
95
- parts = []
96
- for c in self.output.chunks:
97
- for s in c.segments:
98
- v = getattr(s, t)
99
- if v:
100
- parts.append(v)
101
- return "\n".join(parts)
102
-
103
- # Satisfying TaskBase abstract methods with stubs
50
+ r.raise_for_status()
@@ -1,31 +1,83 @@
1
- from abc import ABC, abstractmethod
2
1
  from .config import Configuration
2
+ from .protocol import ChunkrClientProtocol
3
+ from ..models import Status, OutputResponse
4
+ from abc import ABC, abstractmethod
5
+ from typing import TypeVar, Optional, Generic, Union
6
+ from pydantic import BaseModel, PrivateAttr
7
+ from datetime import datetime
8
+
9
+ T = TypeVar('T', bound='TaskBase')
10
+
11
+ class TaskBase(BaseModel, ABC, Generic[T]):
12
+ configuration: Configuration
13
+ created_at: datetime
14
+ expires_at: Optional[datetime]
15
+ file_name: Optional[str]
16
+ finished_at: Optional[datetime]
17
+ input_file_url: Optional[str]
18
+ message: str
19
+ output: Optional[OutputResponse]
20
+ page_count: Optional[int]
21
+ pdf_url: Optional[str]
22
+ started_at: Optional[datetime]
23
+ status: Status
24
+ task_id: str
25
+ task_url: Optional[str]
26
+ _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
3
27
 
4
- class TaskBase(ABC):
5
28
  @abstractmethod
6
- def poll(self):
29
+ def _poll_request(self) -> dict:
30
+ """Helper method to make polling request with retry logic (synchronous)"""
7
31
  pass
8
32
 
9
33
  @abstractmethod
10
- def update(self, config: Configuration):
34
+ def poll(self) -> T:
35
+ """Poll the task for completion."""
11
36
  pass
12
37
 
13
38
  @abstractmethod
14
- def cancel(self):
39
+ def update(self, config: Configuration) -> T:
40
+ """Update the task configuration."""
15
41
  pass
16
42
 
17
43
  @abstractmethod
18
- def delete(self):
44
+ def cancel(self) -> T:
45
+ """Cancel the task."""
19
46
  pass
20
47
 
21
48
  @abstractmethod
22
- def html(self) -> str:
49
+ def delete(self) -> T:
50
+ """Delete the task."""
23
51
  pass
24
52
 
25
- @abstractmethod
53
+ def with_client(self, client: Union[ChunkrClientProtocol]) -> T:
54
+ self._client = client
55
+ return self
56
+
57
+ def _check_status(self) -> Optional[T]:
58
+ """Helper method to check task status and handle completion/failure"""
59
+ if self.status == "Failed":
60
+ raise ValueError(self.message)
61
+ if self.status not in ("Starting", "Processing"):
62
+ return self
63
+ return None
64
+
65
+ def html(self) -> str:
66
+ return self._get_content("html")
67
+
26
68
  def markdown(self) -> str:
27
- pass
69
+ return self._get_content("markdown")
28
70
 
29
- @abstractmethod
30
71
  def content(self) -> str:
31
- pass
72
+ return self._get_content("content")
73
+
74
+ def _get_content(self, t: str) -> str:
75
+ if not self.output:
76
+ return ""
77
+ parts = []
78
+ for c in self.output.chunks:
79
+ for s in c.segments:
80
+ v = getattr(s, t)
81
+ if v:
82
+ parts.append(v)
83
+ return "\n".join(parts)
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.25.0
10
+ Requires-Dist: httpx>=0.25.0
10
11
  Requires-Dist: pillow>=10.0.0
11
12
  Requires-Dist: pydantic>=2.0.0
12
13
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -5,15 +5,15 @@ chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
5
5
  chunkr_ai/api/chunkr.py,sha256=0qpV9b1hOpDhA9EuKkXW9X_laUmw5NY3ZYq0cUOTbww,5190
6
6
  chunkr_ai/api/chunkr_async.py,sha256=ZkLBrn4cqzu3sqMfS8cfZZgSvpdyQuWZP95lfGxuHx0,4900
7
7
  chunkr_ai/api/chunkr_base.py,sha256=IYO0pmoL02GchIggj6_Q5nvtAUoOvYAAvT7VLFU6scY,2506
8
- chunkr_ai/api/config.py,sha256=y6wZz01ihRJ_5_cK_JklFWn397yll7jfXntd8bBBa5s,4861
8
+ chunkr_ai/api/config.py,sha256=joTn7jiOlJXTwwza-jHauLV-39CMzaxZVGB9JBm8Cok,4862
9
9
  chunkr_ai/api/misc.py,sha256=9vnfrbJ7sFlZqwEIQ4NTMb5rhPOmETT7e1jR-b42PXM,4977
10
10
  chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
11
11
  chunkr_ai/api/schema.py,sha256=OeLOhBRXeRBgEImg0Q6O9Z10ojT6aSEVvwnDR8UeENo,4971
12
- chunkr_ai/api/task.py,sha256=Z5Da_Ijvih5rBz5ry98oAYNcJEDbQhhDWBQ35nHCRK4,5881
13
- chunkr_ai/api/task_async.py,sha256=o7tXvViIrdcrdclxaGzxrgIv-n-W8-twQ7XsDLXfXhM,3659
14
- chunkr_ai/api/task_base.py,sha256=Tkk7dhIeB3ic5M9g_b-MVRdNv4XQTvajpaUy8JylQ8A,526
15
- chunkr_ai-0.0.11.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- chunkr_ai-0.0.11.dist-info/METADATA,sha256=s8UeXDnBDVG_1RN5colcJCGhwrICRy9VMQWmTUKVRJc,4845
17
- chunkr_ai-0.0.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
18
- chunkr_ai-0.0.11.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
19
- chunkr_ai-0.0.11.dist-info/RECORD,,
12
+ chunkr_ai/api/task.py,sha256=4insrdGEVzBHs1ejZvde8bbEetVzgJELa47UjhfBqCA,2116
13
+ chunkr_ai/api/task_async.py,sha256=LqS-LL-mCOgfGsgvuSXhKkSEUM6MMro-EZHl_ZedQQk,1998
14
+ chunkr_ai/api/task_base.py,sha256=iS5UVIDEPIiDoWrn21Oh_dQurkd_hvKQ8ng32j6sGoA,2369
15
+ chunkr_ai-0.0.12.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ chunkr_ai-0.0.12.dist-info/METADATA,sha256=dfo9myRizW2A5W0H6FpIoBzHa4QxmEe3lsedPYhwjXM,4874
17
+ chunkr_ai-0.0.12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
18
+ chunkr_ai-0.0.12.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
19
+ chunkr_ai-0.0.12.dist-info/RECORD,,