chunkr-ai 0.0.11__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {chunkr_ai-0.0.11/src/chunkr_ai.egg-info → chunkr_ai-0.0.12}/PKG-INFO +2 -1
  2. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/pyproject.toml +2 -1
  3. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/config.py +1 -1
  4. chunkr_ai-0.0.12/src/chunkr_ai/api/task.py +61 -0
  5. chunkr_ai-0.0.12/src/chunkr_ai/api/task_async.py +50 -0
  6. chunkr_ai-0.0.12/src/chunkr_ai/api/task_base.py +83 -0
  7. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12/src/chunkr_ai.egg-info}/PKG-INFO +2 -1
  8. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai.egg-info/requires.txt +1 -0
  9. chunkr_ai-0.0.11/src/chunkr_ai/api/task.py +0 -168
  10. chunkr_ai-0.0.11/src/chunkr_ai/api/task_async.py +0 -103
  11. chunkr_ai-0.0.11/src/chunkr_ai/api/task_base.py +0 -31
  12. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/LICENSE +0 -0
  13. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/README.md +0 -0
  14. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/setup.cfg +0 -0
  15. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/__init__.py +0 -0
  16. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/__init__.py +0 -0
  17. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/auth.py +0 -0
  18. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/chunkr.py +0 -0
  19. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/chunkr_async.py +0 -0
  20. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/chunkr_base.py +0 -0
  21. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/misc.py +0 -0
  22. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/protocol.py +0 -0
  23. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/api/schema.py +0 -0
  24. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai/models.py +0 -0
  25. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  26. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  27. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/src/chunkr_ai.egg-info/top_level.txt +0 -0
  28. {chunkr_ai-0.0.11 → chunkr_ai-0.0.12}/tests/test_chunkr.py +0 -0
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.25.0
10
+ Requires-Dist: httpx>=0.25.0
10
11
  Requires-Dist: pillow>=10.0.0
11
12
  Requires-Dist: pydantic>=2.0.0
12
13
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -4,13 +4,14 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.11"
7
+ version = "0.0.12"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
11
11
  license = {"file" = "LICENSE"}
12
12
  urls = {Homepage = "https://chunkr.ai"}
13
13
  dependencies = [
14
+ "httpx>=0.25.0",
14
15
  "httpx>=0.25.0",
15
16
  "pillow>=10.0.0",
16
17
  "pydantic>=2.0.0",
@@ -142,4 +142,4 @@ class Status(str, Enum):
142
142
  PROCESSING = "Processing"
143
143
  SUCCEEDED = "Succeeded"
144
144
  FAILED = "Failed"
145
- CANCELLED = "Cancelled"
145
+ CANCELLED = "Cancelled"
@@ -0,0 +1,61 @@
1
+ from .config import Configuration
2
+ from .misc import prepare_upload_data
3
+ from .task_base import TaskBase
4
+ import time
5
+
6
+ class TaskResponse(TaskBase):
7
+ def _poll_request(self) -> dict:
8
+ while True:
9
+ try:
10
+ r = self._client._session.get(self.task_url, headers=self._client._headers())
11
+ r.raise_for_status()
12
+ return r.json()
13
+ except (ConnectionError, TimeoutError) as _:
14
+ print("Connection error while polling the task, retrying...")
15
+ time.sleep(0.5)
16
+ except Exception as e:
17
+ raise
18
+
19
+ def poll(self) -> 'TaskResponse':
20
+ if not self.task_url:
21
+ raise ValueError("Task URL not found in response")
22
+ while True:
23
+ response = self._poll_request_sync()
24
+ updated_task = TaskResponse(**response).with_client(self._client)
25
+ self.__dict__.update(updated_task.__dict__)
26
+ if result := self._check_status():
27
+ return result
28
+ time.sleep(0.5)
29
+
30
+ def update(self, config: Configuration) -> 'TaskResponse':
31
+ if not self.task_url:
32
+ raise ValueError("Task URL not found")
33
+ files = prepare_upload_data(None, config)
34
+ r = self._client._session.patch(
35
+ f"{self.task_url}",
36
+ files=files,
37
+ headers=self._client._headers()
38
+ )
39
+ r.raise_for_status()
40
+ updated = TaskResponse(**r.json()).with_client(self._client)
41
+ self.__dict__.update(updated.__dict__)
42
+ return self.poll()
43
+
44
+ def cancel(self):
45
+ if not self.task_url:
46
+ raise ValueError("Task URL not found")
47
+ r = self._client._session.get(
48
+ f"{self.task_url}/cancel",
49
+ headers=self._client._headers()
50
+ )
51
+ r.raise_for_status()
52
+ self.poll()
53
+
54
+ def delete(self):
55
+ if not self.task_url:
56
+ raise ValueError("Task URL not found")
57
+ r = self._client._session.delete(
58
+ self.task_url,
59
+ headers=self._client._headers()
60
+ )
61
+ r.raise_for_status()
@@ -0,0 +1,50 @@
1
+ from .config import Configuration
2
+ from .misc import prepare_upload_data
3
+ from .task_base import TaskBase
4
+ import asyncio
5
+
6
+ class TaskResponseAsync(TaskBase):
7
+ async def _poll_request(self) -> dict:
8
+ try:
9
+ r = await self._client._client.get(self.task_url, headers=self._client._headers())
10
+ r.raise_for_status()
11
+ return r.json()
12
+ except (ConnectionError, TimeoutError) as _:
13
+ print("Connection error while polling the task, retrying...")
14
+ await asyncio.sleep(0.5)
15
+ except Exception as e:
16
+ raise
17
+
18
+ async def poll(self) -> 'TaskResponseAsync':
19
+ if not self.task_url:
20
+ raise ValueError("Task URL not found")
21
+ while True:
22
+ j = await self._poll_request()
23
+ updated = TaskResponseAsync(**j).with_client(self._client)
24
+ self.__dict__.update(updated.__dict__)
25
+ if res := self._check_status():
26
+ return res
27
+ await asyncio.sleep(0.5)
28
+
29
+ async def update(self, config: Configuration) -> 'TaskResponseAsync':
30
+ if not self.task_url:
31
+ raise ValueError("Task URL not found")
32
+ f = prepare_upload_data(None, config)
33
+ r = await self._client._client.patch(self.task_url, files=f, headers=self._client._headers())
34
+ r.raise_for_status()
35
+ updated = TaskResponseAsync(**r.json()).with_client(self._client)
36
+ self.__dict__.update(updated.__dict__)
37
+ return await self.poll()
38
+
39
+ async def cancel(self):
40
+ if not self.task_url:
41
+ raise ValueError("Task URL not found")
42
+ r = await self._client._client.get(f"{self.task_url}/cancel", headers=self._client._headers())
43
+ r.raise_for_status()
44
+ return await self.poll()
45
+
46
+ async def delete(self):
47
+ if not self.task_url:
48
+ raise ValueError("Task URL not found")
49
+ r = await self._client._client.delete(self.task_url, headers=self._client._headers())
50
+ r.raise_for_status()
@@ -0,0 +1,83 @@
1
+ from .config import Configuration
2
+ from .protocol import ChunkrClientProtocol
3
+ from ..models import Status, OutputResponse
4
+ from abc import ABC, abstractmethod
5
+ from typing import TypeVar, Optional, Generic, Union
6
+ from pydantic import BaseModel, PrivateAttr
7
+ from datetime import datetime
8
+
9
+ T = TypeVar('T', bound='TaskBase')
10
+
11
+ class TaskBase(BaseModel, ABC, Generic[T]):
12
+ configuration: Configuration
13
+ created_at: datetime
14
+ expires_at: Optional[datetime]
15
+ file_name: Optional[str]
16
+ finished_at: Optional[datetime]
17
+ input_file_url: Optional[str]
18
+ message: str
19
+ output: Optional[OutputResponse]
20
+ page_count: Optional[int]
21
+ pdf_url: Optional[str]
22
+ started_at: Optional[datetime]
23
+ status: Status
24
+ task_id: str
25
+ task_url: Optional[str]
26
+ _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
27
+
28
+ @abstractmethod
29
+ def _poll_request(self) -> dict:
30
+ """Helper method to make polling request with retry logic (synchronous)"""
31
+ pass
32
+
33
+ @abstractmethod
34
+ def poll(self) -> T:
35
+ """Poll the task for completion."""
36
+ pass
37
+
38
+ @abstractmethod
39
+ def update(self, config: Configuration) -> T:
40
+ """Update the task configuration."""
41
+ pass
42
+
43
+ @abstractmethod
44
+ def cancel(self) -> T:
45
+ """Cancel the task."""
46
+ pass
47
+
48
+ @abstractmethod
49
+ def delete(self) -> T:
50
+ """Delete the task."""
51
+ pass
52
+
53
+ def with_client(self, client: Union[ChunkrClientProtocol]) -> T:
54
+ self._client = client
55
+ return self
56
+
57
+ def _check_status(self) -> Optional[T]:
58
+ """Helper method to check task status and handle completion/failure"""
59
+ if self.status == "Failed":
60
+ raise ValueError(self.message)
61
+ if self.status not in ("Starting", "Processing"):
62
+ return self
63
+ return None
64
+
65
+ def html(self) -> str:
66
+ return self._get_content("html")
67
+
68
+ def markdown(self) -> str:
69
+ return self._get_content("markdown")
70
+
71
+ def content(self) -> str:
72
+ return self._get_content("content")
73
+
74
+ def _get_content(self, t: str) -> str:
75
+ if not self.output:
76
+ return ""
77
+ parts = []
78
+ for c in self.output.chunks:
79
+ for s in c.segments:
80
+ v = getattr(s, t)
81
+ if v:
82
+ parts.append(v)
83
+ return "\n".join(parts)
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: httpx>=0.25.0
10
+ Requires-Dist: httpx>=0.25.0
10
11
  Requires-Dist: pillow>=10.0.0
11
12
  Requires-Dist: pydantic>=2.0.0
12
13
  Requires-Dist: pytest-asyncio>=0.21.0
@@ -1,4 +1,5 @@
1
1
  httpx>=0.25.0
2
+ httpx>=0.25.0
2
3
  pillow>=10.0.0
3
4
  pydantic>=2.0.0
4
5
  pytest-asyncio>=0.21.0
@@ -1,168 +0,0 @@
1
- from .protocol import ChunkrClientProtocol
2
- from .config import Configuration, OutputResponse, Status
3
- from .misc import prepare_upload_data
4
- import asyncio
5
- from datetime import datetime
6
- from pydantic import BaseModel, PrivateAttr
7
- import time
8
- from typing import Optional, Union
9
-
10
- class TaskResponse(BaseModel):
11
- configuration: Configuration
12
- created_at: datetime
13
- expires_at: Optional[datetime] = None
14
- file_name: Optional[str] = None
15
- finished_at: Optional[datetime] = None
16
- input_file_url: Optional[str] = None
17
- message: str
18
- output: Optional[OutputResponse] = None
19
- page_count: Optional[int] = None
20
- pdf_url: Optional[str] = None
21
- started_at: Optional[datetime] = None
22
- status: Status
23
- task_id: str
24
- task_url: Optional[str] = None
25
- _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
26
-
27
- def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponse':
28
- self._client = client
29
- return self
30
-
31
- def _poll_request_sync(self) -> dict:
32
- """Helper method to make polling request with retry logic (synchronous)"""
33
- if not self.task_url:
34
- raise ValueError("Task URL not found in response")
35
-
36
- while True:
37
- try:
38
- r = self._client._session.get(self.task_url, headers=self._client._headers())
39
- r.raise_for_status()
40
- return r.json()
41
- except (ConnectionError, TimeoutError) as _:
42
- print("Connection error while polling the task, retrying...")
43
- time.sleep(0.5)
44
- except Exception as e:
45
- raise
46
-
47
- async def _poll_request_async(self) -> dict:
48
- """Helper method to make polling request with retry logic (asynchronous)"""
49
- if not self.task_url:
50
- raise ValueError("Task URL not found in response")
51
-
52
- while True:
53
- try:
54
- r = await self._client._client.get(self.task_url, headers=self._client._headers())
55
- r.raise_for_status()
56
- response = r.json()
57
- return response
58
- except (ConnectionError, TimeoutError) as _:
59
- print("Connection error while polling the task, retrying...")
60
- await asyncio.sleep(0.5)
61
- except Exception as e:
62
- raise
63
-
64
- def _check_status(self) -> Optional['TaskResponse']:
65
- """Helper method to check task status and handle completion/failure"""
66
- if self.status == "Failed":
67
- raise ValueError(self.message)
68
- if self.status not in ("Starting", "Processing"):
69
- return self
70
- return None
71
-
72
- def poll(self) -> 'TaskResponse':
73
- """Poll the task for completion."""
74
- while True:
75
- response = self._poll_request_sync()
76
- updated_task = TaskResponse(**response).with_client(self._client)
77
- self.__dict__.update(updated_task.__dict__)
78
-
79
- if result := self._check_status():
80
- return result
81
-
82
- time.sleep(0.5)
83
-
84
- async def poll_async(self) -> 'TaskResponse':
85
- """Poll the task for completion asynchronously."""
86
- while True:
87
- response = await self._poll_request_async()
88
- updated_task = TaskResponse(**response).with_client(self._client)
89
- self.__dict__.update(updated_task.__dict__)
90
-
91
- if result := self._check_status():
92
- return result
93
-
94
- await asyncio.sleep(0.5)
95
-
96
- def _get_content(self, content_type: str) -> str:
97
- """Helper method to get either HTML, Markdown, or raw content."""
98
- if not self.output:
99
- return ""
100
- parts = []
101
- for c in self.output.chunks:
102
- for s in c.segments:
103
- content = getattr(s, content_type)
104
- if content:
105
- parts.append(content)
106
- return "\n".join(parts)
107
-
108
- def update(self, config: Configuration) -> 'TaskResponse':
109
- files = prepare_upload_data(None, config)
110
- r = self._client._session.patch(
111
- f"{self.task_url}",
112
- files=files,
113
- headers=self._client._headers()
114
- )
115
- r.raise_for_status()
116
- return TaskResponse(**r.json()).with_client(self._client)
117
-
118
- async def update_async(self, config: Configuration) -> 'TaskResponse':
119
- files = prepare_upload_data(None, config)
120
- r = await self._client._client.patch(
121
- f"{self.task_url}",
122
- files=files,
123
- headers=self._client._headers()
124
- )
125
- r.raise_for_status()
126
- return TaskResponse(**r.json()).with_client(self._client)
127
-
128
- def cancel(self):
129
- r = self._client._session.get(
130
- f"{self.task_url}/cancel",
131
- headers=self._client._headers()
132
- )
133
- r.raise_for_status()
134
- self.poll()
135
-
136
- async def cancel_async(self):
137
- r = await self._client._client.get(
138
- f"{self.task_url}/cancel",
139
- headers=self._client._headers()
140
- )
141
- r.raise_for_status()
142
- await self.poll_async()
143
-
144
- def delete(self):
145
- r = self._client._session.delete(
146
- f"{self.task_url}",
147
- headers=self._client._headers()
148
- )
149
- r.raise_for_status()
150
-
151
- async def delete_async(self):
152
- r = await self._client._client.delete(
153
- f"{self.task_url}",
154
- headers=self._client._headers()
155
- )
156
- r.raise_for_status()
157
-
158
- def html(self) -> str:
159
- """Get full HTML for the task"""
160
- return self._get_content("html")
161
-
162
- def markdown(self) -> str:
163
- """Get full markdown for the task"""
164
- return self._get_content("markdown")
165
-
166
- def content(self) -> str:
167
- """Get full text for the task"""
168
- return self._get_content("content")
@@ -1,103 +0,0 @@
1
- import asyncio
2
- from pydantic import BaseModel, PrivateAttr
3
- from datetime import datetime
4
- from typing import Optional, Union
5
- from .task_base import TaskBase
6
- from .protocol import ChunkrClientProtocol
7
- from .config import Configuration, OutputResponse, Status
8
- from .misc import prepare_upload_data
9
-
10
- class TaskResponseAsync(BaseModel, TaskBase):
11
- configuration: Configuration
12
- created_at: datetime
13
- expires_at: Optional[datetime]
14
- file_name: Optional[str]
15
- finished_at: Optional[datetime]
16
- input_file_url: Optional[str]
17
- message: str
18
- output: Optional[OutputResponse]
19
- page_count: Optional[int]
20
- pdf_url: Optional[str]
21
- started_at: Optional[datetime]
22
- status: Status
23
- task_id: str
24
- task_url: Optional[str]
25
- _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
26
-
27
- def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponseAsync':
28
- self._client = client
29
- return self
30
-
31
- async def poll(self) -> 'TaskResponseAsync':
32
- while True:
33
- j = await self._poll_request()
34
- updated = TaskResponseAsync(**j).with_client(self._client)
35
- self.__dict__.update(updated.__dict__)
36
- if res := self._check_status():
37
- return res
38
- await asyncio.sleep(0.5)
39
-
40
- async def _poll_request(self) -> dict:
41
- if not self.task_url:
42
- raise ValueError("Task URL not found")
43
- while True:
44
- try:
45
- r = await self._client._client.get(self.task_url, headers=self._client._headers())
46
- r.raise_for_status()
47
- return r.json()
48
- except Exception as e:
49
- if self.status == Status.FAILED:
50
- raise ValueError(self.message) from e
51
- await asyncio.sleep(0.5)
52
-
53
- def _check_status(self) -> Optional['TaskResponseAsync']:
54
- if self.status == Status.FAILED:
55
- raise ValueError(f"Task failed: {self.message}")
56
- if self.status == Status.CANCELLED:
57
- return self
58
- if self.status not in [Status.STARTING, Status.PROCESSING]:
59
- return self
60
- return None
61
-
62
- async def update(self, config: Configuration) -> 'TaskResponseAsync':
63
- if not self.task_url:
64
- raise ValueError("Task URL not found")
65
- f = prepare_upload_data(None, config)
66
- r = await self._client._client.patch(self.task_url, files=f, headers=self._client._headers())
67
- r.raise_for_status()
68
- updated = TaskResponseAsync(**r.json()).with_client(self._client)
69
- self.__dict__.update(updated.__dict__)
70
- return await self.poll()
71
-
72
- async def cancel(self):
73
- if not self.task_url:
74
- raise ValueError("Task URL not found")
75
- r = await self._client._client.get(f"{self.task_url}/cancel", headers=self._client._headers())
76
- r.raise_for_status()
77
- return await self.poll()
78
-
79
- async def delete(self):
80
- r = await self._client._client.delete(self.task_url, headers=self._client._headers())
81
- r.raise_for_status()
82
-
83
- def html(self) -> str:
84
- return self._get_content("html")
85
-
86
- def markdown(self) -> str:
87
- return self._get_content("markdown")
88
-
89
- def content(self) -> str:
90
- return self._get_content("content")
91
-
92
- def _get_content(self, t: str) -> str:
93
- if not self.output:
94
- return ""
95
- parts = []
96
- for c in self.output.chunks:
97
- for s in c.segments:
98
- v = getattr(s, t)
99
- if v:
100
- parts.append(v)
101
- return "\n".join(parts)
102
-
103
- # Satisfying TaskBase abstract methods with stubs
@@ -1,31 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from .config import Configuration
3
-
4
- class TaskBase(ABC):
5
- @abstractmethod
6
- def poll(self):
7
- pass
8
-
9
- @abstractmethod
10
- def update(self, config: Configuration):
11
- pass
12
-
13
- @abstractmethod
14
- def cancel(self):
15
- pass
16
-
17
- @abstractmethod
18
- def delete(self):
19
- pass
20
-
21
- @abstractmethod
22
- def html(self) -> str:
23
- pass
24
-
25
- @abstractmethod
26
- def markdown(self) -> str:
27
- pass
28
-
29
- @abstractmethod
30
- def content(self) -> str:
31
- pass
File without changes
File without changes
File without changes