chunkr-ai 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- chunkr_ai/__init__.py +1 -2
- chunkr_ai/api/chunkr.py +46 -39
- chunkr_ai/api/chunkr_base.py +142 -8
- chunkr_ai/api/config.py +18 -45
- chunkr_ai/api/decorators.py +58 -0
- chunkr_ai/api/misc.py +0 -2
- chunkr_ai/api/protocol.py +0 -2
- chunkr_ai/api/task_response.py +119 -0
- chunkr_ai/models.py +3 -12
- {chunkr_ai-0.0.16.dist-info → chunkr_ai-0.0.18.dist-info}/METADATA +1 -2
- chunkr_ai-0.0.18.dist-info/RECORD +17 -0
- chunkr_ai/api/base.py +0 -183
- chunkr_ai/api/chunkr_async.py +0 -120
- chunkr_ai/api/schema.py +0 -136
- chunkr_ai/api/task.py +0 -66
- chunkr_ai/api/task_async.py +0 -69
- chunkr_ai/api/task_base.py +0 -85
- chunkr_ai-0.0.16.dist-info/RECORD +0 -21
- {chunkr_ai-0.0.16.dist-info → chunkr_ai-0.0.18.dist-info}/LICENSE +0 -0
- {chunkr_ai-0.0.16.dist-info → chunkr_ai-0.0.18.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.16.dist-info → chunkr_ai-0.0.18.dist-info}/top_level.txt +0 -0
chunkr_ai/api/task.py
DELETED
@@ -1,66 +0,0 @@
|
|
1
|
-
from .config import Configuration
|
2
|
-
from .misc import prepare_upload_data
|
3
|
-
from .task_base import TaskBase
|
4
|
-
import time
|
5
|
-
|
6
|
-
|
7
|
-
class TaskResponse(TaskBase):
|
8
|
-
def _poll_request(self) -> dict:
|
9
|
-
while True:
|
10
|
-
try:
|
11
|
-
if not self.task_url:
|
12
|
-
raise ValueError("Task URL not found in response")
|
13
|
-
if not self._client._session:
|
14
|
-
raise ValueError("Client session not found")
|
15
|
-
r = self._client._session.get(
|
16
|
-
self.task_url, headers=self._client._headers()
|
17
|
-
)
|
18
|
-
r.raise_for_status()
|
19
|
-
return r.json()
|
20
|
-
except (ConnectionError, TimeoutError) as _:
|
21
|
-
print("Connection error while polling the task, retrying...")
|
22
|
-
time.sleep(0.5)
|
23
|
-
except Exception:
|
24
|
-
raise
|
25
|
-
|
26
|
-
def poll(self) -> "TaskResponse":
|
27
|
-
while True:
|
28
|
-
response = self._poll_request()
|
29
|
-
updated_task = TaskResponse(**response).with_client(self._client)
|
30
|
-
self.__dict__.update(updated_task.__dict__)
|
31
|
-
if result := self._check_status():
|
32
|
-
return result
|
33
|
-
time.sleep(0.5)
|
34
|
-
|
35
|
-
def update(self, config: Configuration) -> "TaskResponse":
|
36
|
-
if not self.task_url:
|
37
|
-
raise ValueError("Task URL not found")
|
38
|
-
if not self._client._session:
|
39
|
-
raise ValueError("Client session not found")
|
40
|
-
files = prepare_upload_data(None, config)
|
41
|
-
r = self._client._session.patch(
|
42
|
-
self.task_url, files=files, headers=self._client._headers()
|
43
|
-
)
|
44
|
-
r.raise_for_status()
|
45
|
-
updated = TaskResponse(**r.json()).with_client(self._client)
|
46
|
-
self.__dict__.update(updated.__dict__)
|
47
|
-
return self.poll()
|
48
|
-
|
49
|
-
def cancel(self):
|
50
|
-
if not self.task_url:
|
51
|
-
raise ValueError("Task URL not found")
|
52
|
-
if not self._client._session:
|
53
|
-
raise ValueError("Client session not found")
|
54
|
-
r = self._client._session.get(
|
55
|
-
f"{self.task_url}/cancel", headers=self._client._headers()
|
56
|
-
)
|
57
|
-
r.raise_for_status()
|
58
|
-
self.poll()
|
59
|
-
|
60
|
-
def delete(self):
|
61
|
-
if not self.task_url:
|
62
|
-
raise ValueError("Task URL not found")
|
63
|
-
if not self._client._session:
|
64
|
-
raise ValueError("Client session not found")
|
65
|
-
r = self._client._session.delete(self.task_url, headers=self._client._headers())
|
66
|
-
r.raise_for_status()
|
chunkr_ai/api/task_async.py
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
from .config import Configuration
|
2
|
-
from .misc import prepare_upload_data
|
3
|
-
from .task_base import TaskBase
|
4
|
-
import asyncio
|
5
|
-
|
6
|
-
|
7
|
-
class TaskResponseAsync(TaskBase):
|
8
|
-
async def _poll_request(self) -> dict:
|
9
|
-
try:
|
10
|
-
if not self._client._client:
|
11
|
-
raise ValueError("Client not found")
|
12
|
-
r = await self._client._client.get(
|
13
|
-
self.task_url, headers=self._client._headers()
|
14
|
-
)
|
15
|
-
r.raise_for_status()
|
16
|
-
return r.json()
|
17
|
-
except (ConnectionError, TimeoutError) as _:
|
18
|
-
print("Connection error while polling the task, retrying...")
|
19
|
-
await asyncio.sleep(0.5)
|
20
|
-
except Exception:
|
21
|
-
raise
|
22
|
-
|
23
|
-
async def poll(self) -> "TaskResponseAsync":
|
24
|
-
if not self.task_url:
|
25
|
-
raise ValueError("Task URL not found")
|
26
|
-
if not self._client._client:
|
27
|
-
raise ValueError("Client not found")
|
28
|
-
while True:
|
29
|
-
j = await self._poll_request()
|
30
|
-
updated = TaskResponseAsync(**j).with_client(self._client)
|
31
|
-
self.__dict__.update(updated.__dict__)
|
32
|
-
if res := self._check_status():
|
33
|
-
return res
|
34
|
-
await asyncio.sleep(0.5)
|
35
|
-
|
36
|
-
async def update(self, config: Configuration) -> "TaskResponseAsync":
|
37
|
-
if not self.task_url:
|
38
|
-
raise ValueError("Task URL not found")
|
39
|
-
if not self._client._client:
|
40
|
-
raise ValueError("Client not found")
|
41
|
-
f = prepare_upload_data(None, config)
|
42
|
-
r = await self._client._client.patch(
|
43
|
-
self.task_url, files=f, headers=self._client._headers()
|
44
|
-
)
|
45
|
-
r.raise_for_status()
|
46
|
-
updated = TaskResponseAsync(**r.json()).with_client(self._client)
|
47
|
-
self.__dict__.update(updated.__dict__)
|
48
|
-
return await self.poll()
|
49
|
-
|
50
|
-
async def cancel(self):
|
51
|
-
if not self.task_url:
|
52
|
-
raise ValueError("Task URL not found")
|
53
|
-
if not self._client._client:
|
54
|
-
raise ValueError("Client not found")
|
55
|
-
r = await self._client._client.get(
|
56
|
-
f"{self.task_url}/cancel", headers=self._client._headers()
|
57
|
-
)
|
58
|
-
r.raise_for_status()
|
59
|
-
return await self.poll()
|
60
|
-
|
61
|
-
async def delete(self):
|
62
|
-
if not self.task_url:
|
63
|
-
raise ValueError("Task URL not found")
|
64
|
-
if not self._client._client:
|
65
|
-
raise ValueError("Client not found")
|
66
|
-
r = await self._client._client.delete(
|
67
|
-
self.task_url, headers=self._client._headers()
|
68
|
-
)
|
69
|
-
r.raise_for_status()
|
chunkr_ai/api/task_base.py
DELETED
@@ -1,85 +0,0 @@
|
|
1
|
-
from .config import Configuration, Status, OutputResponse
|
2
|
-
from .protocol import ChunkrClientProtocol
|
3
|
-
from abc import ABC, abstractmethod
|
4
|
-
from typing import TypeVar, Optional, Generic
|
5
|
-
from pydantic import BaseModel, PrivateAttr
|
6
|
-
from datetime import datetime
|
7
|
-
|
8
|
-
T = TypeVar("T", bound="TaskBase")
|
9
|
-
|
10
|
-
class TaskBase(BaseModel, ABC, Generic[T]):
|
11
|
-
configuration: Configuration
|
12
|
-
created_at: datetime
|
13
|
-
expires_at: Optional[datetime]
|
14
|
-
file_name: Optional[str]
|
15
|
-
finished_at: Optional[datetime]
|
16
|
-
input_file_url: Optional[str]
|
17
|
-
message: str
|
18
|
-
output: Optional[OutputResponse]
|
19
|
-
page_count: Optional[int]
|
20
|
-
pdf_url: Optional[str]
|
21
|
-
started_at: Optional[datetime]
|
22
|
-
status: Status
|
23
|
-
task_id: str
|
24
|
-
task_url: Optional[str]
|
25
|
-
_client: Optional[ChunkrClientProtocol] = PrivateAttr(default=None)
|
26
|
-
|
27
|
-
@abstractmethod
|
28
|
-
def _poll_request(self) -> dict:
|
29
|
-
"""Helper method to make polling request with retry logic (synchronous)"""
|
30
|
-
pass
|
31
|
-
|
32
|
-
@abstractmethod
|
33
|
-
def poll(self) -> T:
|
34
|
-
"""Poll the task for completion."""
|
35
|
-
pass
|
36
|
-
|
37
|
-
@abstractmethod
|
38
|
-
def update(self, config: Configuration) -> T:
|
39
|
-
"""Update the task configuration."""
|
40
|
-
pass
|
41
|
-
|
42
|
-
@abstractmethod
|
43
|
-
def cancel(self) -> T:
|
44
|
-
"""Cancel the task."""
|
45
|
-
pass
|
46
|
-
|
47
|
-
@abstractmethod
|
48
|
-
def delete(self) -> T:
|
49
|
-
"""Delete the task."""
|
50
|
-
pass
|
51
|
-
|
52
|
-
def with_client(self, client: ChunkrClientProtocol) -> T:
|
53
|
-
self._client = client
|
54
|
-
return self
|
55
|
-
|
56
|
-
def _check_status(self) -> Optional[T]:
|
57
|
-
"""Helper method to check task status and handle completion/failure"""
|
58
|
-
if self.status == "Failed":
|
59
|
-
raise ValueError(self.message)
|
60
|
-
if self.status not in ("Starting", "Processing"):
|
61
|
-
return self
|
62
|
-
return None
|
63
|
-
|
64
|
-
def html(self) -> str:
|
65
|
-
"""Get the full HTML of the task"""
|
66
|
-
return self._get_content("html")
|
67
|
-
|
68
|
-
def markdown(self) -> str:
|
69
|
-
"""Get the full markdown of the task"""
|
70
|
-
return self._get_content("markdown")
|
71
|
-
|
72
|
-
def content(self) -> str:
|
73
|
-
"""Get the full content of the task"""
|
74
|
-
return self._get_content("content")
|
75
|
-
|
76
|
-
def _get_content(self, t: str) -> str:
|
77
|
-
if not self.output:
|
78
|
-
return ""
|
79
|
-
parts = []
|
80
|
-
for c in self.output.chunks:
|
81
|
-
for s in c.segments:
|
82
|
-
v = getattr(s, t)
|
83
|
-
if v:
|
84
|
-
parts.append(v)
|
85
|
-
return "\n".join(parts)
|
@@ -1,21 +0,0 @@
|
|
1
|
-
chunkr_ai/__init__.py,sha256=q5YosvCNXPNGjV10pZY1gcvdosqUh38nVQTQA9g8EuM,110
|
2
|
-
chunkr_ai/models.py,sha256=iy8bqswbiNf-rCwmDnAF2jkZT7apOpxIxUMX3fWiE7k,924
|
3
|
-
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
|
6
|
-
chunkr_ai/api/base.py,sha256=QvHl8FInKHYKPLWDeEPpCchB1uktzOwTW7iPnyXccUc,6449
|
7
|
-
chunkr_ai/api/chunkr.py,sha256=0extAWVeZtI7B-g14smTfFZD_csdJNCcVNXx2_L69OQ,2617
|
8
|
-
chunkr_ai/api/chunkr_async.py,sha256=aa0s_tnYoujHBsfe8uLiPpVEnb2l9A3CXwPP34w9Mk8,4127
|
9
|
-
chunkr_ai/api/chunkr_base.py,sha256=k34Dyt1f21NBWZvZJ3w6Svvpg4SKnzr2ldGQ4ib96Wc,4951
|
10
|
-
chunkr_ai/api/config.py,sha256=TWl0Az6acKQCS1LIpKD4qr_lQ_63wqQ5M6calpLOlDM,5040
|
11
|
-
chunkr_ai/api/misc.py,sha256=bQpURc7soT5GL2ZpY7EiYyvPYWEzDM9qaX-UHa-oFeI,4909
|
12
|
-
chunkr_ai/api/protocol.py,sha256=lxIR_qoCA2a1OXjpq3LrWMdS0jRHct1bEmBlUzV8gvE,526
|
13
|
-
chunkr_ai/api/schema.py,sha256=yYesvueGgtmRa7Fi_Tpdv8A2bzHlx-B-5DxRAPlaDHo,4926
|
14
|
-
chunkr_ai/api/task.py,sha256=28J4dR8BDjvtkh3CQjW_YUEkgPXhCHBGu0wH6AQKKuE,2474
|
15
|
-
chunkr_ai/api/task_async.py,sha256=K5hTEOnmD42snPZg_JtJsVWg6QBUFZ1aBz1Abwv58-A,2529
|
16
|
-
chunkr_ai/api/task_base.py,sha256=KLiMhvvbCgcilguQKrtEPMlNs8oaatfQUtn8pYt9t6g,2467
|
17
|
-
chunkr_ai-0.0.16.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
chunkr_ai-0.0.16.dist-info/METADATA,sha256=ZZnNe88CL56fIw5-3UouYX0I10N9D7NXGzoBQmtd-oE,4839
|
19
|
-
chunkr_ai-0.0.16.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
20
|
-
chunkr_ai-0.0.16.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
21
|
-
chunkr_ai-0.0.16.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|