chunkr-ai 0.0.7__tar.gz → 0.0.9__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. {chunkr_ai-0.0.7/src/chunkr_ai.egg-info → chunkr_ai-0.0.9}/PKG-INFO +9 -9
  2. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/pyproject.toml +9 -9
  3. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/config.py +5 -5
  4. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/task.py +9 -9
  5. chunkr_ai-0.0.9/src/chunkr_ai/api/task_async.py +111 -0
  6. chunkr_ai-0.0.9/src/chunkr_ai/api/task_base.py +31 -0
  7. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9/src/chunkr_ai.egg-info}/PKG-INFO +9 -9
  8. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai.egg-info/SOURCES.txt +2 -0
  9. chunkr_ai-0.0.9/src/chunkr_ai.egg-info/requires.txt +10 -0
  10. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/tests/test_chunkr.py +3 -3
  11. chunkr_ai-0.0.7/src/chunkr_ai.egg-info/requires.txt +0 -10
  12. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/LICENSE +0 -0
  13. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/README.md +0 -0
  14. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/setup.cfg +0 -0
  15. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/__init__.py +0 -0
  16. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/__init__.py +0 -0
  17. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/api.py +0 -0
  18. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/auth.py +0 -0
  19. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/base.py +0 -0
  20. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/chunkr.py +0 -0
  21. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/chunkr_async.py +0 -0
  22. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/misc.py +0 -0
  23. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/api/protocol.py +0 -0
  24. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/main.py +0 -0
  25. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai/models.py +0 -0
  26. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  27. {chunkr_ai-0.0.7 → chunkr_ai-0.0.9}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,20 +1,20 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: httpx>=0.28.1
10
- Requires-Dist: pillow>=11.1.0
11
- Requires-Dist: pydantic>=2.10.4
12
- Requires-Dist: pytest-asyncio>=0.25.2
13
- Requires-Dist: python-dotenv>=1.0.1
14
- Requires-Dist: requests>=2.32.3
9
+ Requires-Dist: httpx>=0.24.0
10
+ Requires-Dist: pillow>=10.0.0
11
+ Requires-Dist: pydantic>=2.0.0
12
+ Requires-Dist: pytest-asyncio>=0.21.0
13
+ Requires-Dist: python-dotenv>=0.19.0
14
+ Requires-Dist: requests>=2.28.0
15
15
  Provides-Extra: test
16
- Requires-Dist: pytest>=8.3.4; extra == "test"
17
- Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
16
+ Requires-Dist: pytest>=7.0.0; extra == "test"
17
+ Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
18
18
 
19
19
  # Chunkr Python Client
20
20
 
@@ -4,24 +4,24 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.7"
7
+ version = "0.0.9"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
11
11
  license = {"file" = "LICENSE"}
12
12
  urls = {Homepage = "https://chunkr.ai"}
13
13
  dependencies = [
14
- "httpx>=0.28.1",
15
- "pillow>=11.1.0",
16
- "pydantic>=2.10.4",
17
- "pytest-asyncio>=0.25.2",
18
- "python-dotenv>=1.0.1",
19
- "requests>=2.32.3",
14
+ "httpx>=0.24.0",
15
+ "pillow>=10.0.0",
16
+ "pydantic>=2.0.0",
17
+ "pytest-asyncio>=0.21.0",
18
+ "python-dotenv>=0.19.0",
19
+ "requests>=2.28.0",
20
20
  ]
21
21
 
22
22
  [project.optional-dependencies]
23
23
  test = [
24
- "pytest>=8.3.4",
25
- "pytest-xdist>=3.6.1",
24
+ "pytest>=7.0.0",
25
+ "pytest-xdist>=3.0.0",
26
26
  ]
27
27
 
@@ -86,9 +86,9 @@ class Segment(BaseModel):
86
86
  bbox: BoundingBox
87
87
  content: str
88
88
  page_height: float
89
- html: Optional[str]
90
- image: Optional[str]
91
- markdown: Optional[str]
89
+ html: Optional[str] = None
90
+ image: Optional[str] = None
91
+ markdown: Optional[str] = None
92
92
  ocr: List[OCRResult]
93
93
  page_number: int
94
94
  page_width: float
@@ -104,8 +104,8 @@ class ExtractedJson(BaseModel):
104
104
  data: Dict
105
105
 
106
106
  class OutputResponse(BaseModel):
107
- chunks: List[Chunk] = []
108
- extracted_json: Optional[ExtractedJson]
107
+ chunks: List[Chunk]
108
+ extracted_json: Optional[ExtractedJson] = Field(default=None)
109
109
 
110
110
  class Model(str, Enum):
111
111
  FAST = "Fast"
@@ -18,18 +18,18 @@ class Status(str, Enum):
18
18
  class TaskResponse(BaseModel):
19
19
  configuration: Configuration
20
20
  created_at: datetime
21
- expires_at: Optional[datetime]
22
- file_name: Optional[str]
23
- finished_at: Optional[datetime]
24
- input_file_url: Optional[str]
21
+ expires_at: Optional[datetime] = None
22
+ file_name: Optional[str] = None
23
+ finished_at: Optional[datetime] = None
24
+ input_file_url: Optional[str] = None
25
25
  message: str
26
- output: Optional[OutputResponse]
27
- page_count: Optional[int]
28
- pdf_url: Optional[str]
29
- started_at: Optional[datetime]
26
+ output: Optional[OutputResponse] = None
27
+ page_count: Optional[int] = None
28
+ pdf_url: Optional[str] = None
29
+ started_at: Optional[datetime] = None
30
30
  status: Status
31
31
  task_id: str
32
- task_url: Optional[str]
32
+ task_url: Optional[str] = None
33
33
  _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
34
34
 
35
35
  def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponse':
@@ -0,0 +1,111 @@
1
+ import asyncio
2
+ from pydantic import BaseModel, PrivateAttr
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import Optional, Union
6
+ from .task_base import TaskBase
7
+ from .protocol import ChunkrClientProtocol
8
+ from .config import Configuration, OutputResponse
9
+ from .misc import prepare_upload_data
10
+
11
+ class Status(str, Enum):
12
+ STARTING = "Starting"
13
+ PROCESSING = "Processing"
14
+ SUCCEEDED = "Succeeded"
15
+ FAILED = "Failed"
16
+ CANCELLED = "Cancelled"
17
+
18
+ class TaskResponseAsync(BaseModel, TaskBase):
19
+ configuration: Configuration
20
+ created_at: datetime
21
+ expires_at: Optional[datetime]
22
+ file_name: Optional[str]
23
+ finished_at: Optional[datetime]
24
+ input_file_url: Optional[str]
25
+ message: str
26
+ output: Optional[OutputResponse]
27
+ page_count: Optional[int]
28
+ pdf_url: Optional[str]
29
+ started_at: Optional[datetime]
30
+ status: Status
31
+ task_id: str
32
+ task_url: Optional[str]
33
+ _client: Optional[Union[ChunkrClientProtocol]] = PrivateAttr(default=None)
34
+
35
+ def with_client(self, client: Union[ChunkrClientProtocol]) -> 'TaskResponseAsync':
36
+ self._client = client
37
+ return self
38
+
39
+ async def poll(self) -> 'TaskResponseAsync':
40
+ while True:
41
+ j = await self._poll_request()
42
+ updated = TaskResponseAsync(**j).with_client(self._client)
43
+ self.__dict__.update(updated.__dict__)
44
+ if res := self._check_status():
45
+ return res
46
+ await asyncio.sleep(0.5)
47
+
48
+ async def _poll_request(self) -> dict:
49
+ if not self.task_url:
50
+ raise ValueError("Task URL not found")
51
+ while True:
52
+ try:
53
+ r = await self._client._client.get(self.task_url, headers=self._client._headers())
54
+ r.raise_for_status()
55
+ return r.json()
56
+ except Exception as e:
57
+ if self.status == Status.FAILED:
58
+ raise ValueError(self.message) from e
59
+ await asyncio.sleep(0.5)
60
+
61
+ def _check_status(self) -> Optional['TaskResponseAsync']:
62
+ if self.status == Status.FAILED:
63
+ raise ValueError(f"Task failed: {self.message}")
64
+ if self.status == Status.CANCELLED:
65
+ return self
66
+ if self.status not in [Status.STARTING, Status.PROCESSING]:
67
+ return self
68
+ return None
69
+
70
+ async def update(self, config: Configuration) -> 'TaskResponseAsync':
71
+ if not self.task_url:
72
+ raise ValueError("Task URL not found")
73
+ f = prepare_upload_data(None, config)
74
+ r = await self._client._client.patch(self.task_url, files=f, headers=self._client._headers())
75
+ r.raise_for_status()
76
+ updated = TaskResponseAsync(**r.json()).with_client(self._client)
77
+ self.__dict__.update(updated.__dict__)
78
+ return await self.poll()
79
+
80
+ async def cancel(self):
81
+ if not self.task_url:
82
+ raise ValueError("Task URL not found")
83
+ r = await self._client._client.get(f"{self.task_url}/cancel", headers=self._client._headers())
84
+ r.raise_for_status()
85
+ return await self.poll()
86
+
87
+ async def delete(self):
88
+ r = await self._client._client.delete(self.task_url, headers=self._client._headers())
89
+ r.raise_for_status()
90
+
91
+ def html(self) -> str:
92
+ return self._get_content("html")
93
+
94
+ def markdown(self) -> str:
95
+ return self._get_content("markdown")
96
+
97
+ def content(self) -> str:
98
+ return self._get_content("content")
99
+
100
+ def _get_content(self, t: str) -> str:
101
+ if not self.output:
102
+ return ""
103
+ parts = []
104
+ for c in self.output.chunks:
105
+ for s in c.segments:
106
+ v = getattr(s, t)
107
+ if v:
108
+ parts.append(v)
109
+ return "\n".join(parts)
110
+
111
+ # Satisfying TaskBase abstract methods with stubs
@@ -0,0 +1,31 @@
1
+ from abc import ABC, abstractmethod
2
+ from .config import Configuration
3
+
4
+ class TaskBase(ABC):
5
+ @abstractmethod
6
+ def poll(self):
7
+ pass
8
+
9
+ @abstractmethod
10
+ def update(self, config: Configuration):
11
+ pass
12
+
13
+ @abstractmethod
14
+ def cancel(self):
15
+ pass
16
+
17
+ @abstractmethod
18
+ def delete(self):
19
+ pass
20
+
21
+ @abstractmethod
22
+ def html(self) -> str:
23
+ pass
24
+
25
+ @abstractmethod
26
+ def markdown(self) -> str:
27
+ pass
28
+
29
+ @abstractmethod
30
+ def content(self) -> str:
31
+ pass
@@ -1,20 +1,20 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: httpx>=0.28.1
10
- Requires-Dist: pillow>=11.1.0
11
- Requires-Dist: pydantic>=2.10.4
12
- Requires-Dist: pytest-asyncio>=0.25.2
13
- Requires-Dist: python-dotenv>=1.0.1
14
- Requires-Dist: requests>=2.32.3
9
+ Requires-Dist: httpx>=0.24.0
10
+ Requires-Dist: pillow>=10.0.0
11
+ Requires-Dist: pydantic>=2.0.0
12
+ Requires-Dist: pytest-asyncio>=0.21.0
13
+ Requires-Dist: python-dotenv>=0.19.0
14
+ Requires-Dist: requests>=2.28.0
15
15
  Provides-Extra: test
16
- Requires-Dist: pytest>=8.3.4; extra == "test"
17
- Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
16
+ Requires-Dist: pytest>=7.0.0; extra == "test"
17
+ Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
18
18
 
19
19
  # Chunkr Python Client
20
20
 
@@ -19,4 +19,6 @@ src/chunkr_ai/api/config.py
19
19
  src/chunkr_ai/api/misc.py
20
20
  src/chunkr_ai/api/protocol.py
21
21
  src/chunkr_ai/api/task.py
22
+ src/chunkr_ai/api/task_async.py
23
+ src/chunkr_ai/api/task_base.py
22
24
  tests/test_chunkr.py
@@ -0,0 +1,10 @@
1
+ httpx>=0.24.0
2
+ pillow>=10.0.0
3
+ pydantic>=2.0.0
4
+ pytest-asyncio>=0.21.0
5
+ python-dotenv>=0.19.0
6
+ requests>=2.28.0
7
+
8
+ [test]
9
+ pytest>=7.0.0
10
+ pytest-xdist>=3.0.0
@@ -19,7 +19,7 @@ from chunkr_ai.models import (
19
19
 
20
20
  @pytest.fixture(params=[
21
21
  pytest.param(("sync", Chunkr()), id="sync"),
22
- # pytest.param(("async", ChunkrAsync()), id="async")
22
+ pytest.param(("async", ChunkrAsync()), id="async")
23
23
  ])
24
24
  def chunkr_client(request):
25
25
  return request.param
@@ -206,9 +206,9 @@ async def test_json_schema(chunkr_client, sample_path):
206
206
 
207
207
  assert isinstance(response, TaskResponse)
208
208
  assert response.task_id is not None
209
- assert response.status == "Succeeded"
209
+ if response.status != "Succeeded":
210
+ raise ValueError(f"Task failed with message: {response.message}")
210
211
  assert response.output is not None
211
-
212
212
  @pytest.mark.asyncio
213
213
  async def test_delete_task(chunkr_client, sample_path):
214
214
  client_type, client = chunkr_client
@@ -1,10 +0,0 @@
1
- httpx>=0.28.1
2
- pillow>=11.1.0
3
- pydantic>=2.10.4
4
- pytest-asyncio>=0.25.2
5
- python-dotenv>=1.0.1
6
- requests>=2.32.3
7
-
8
- [test]
9
- pytest>=8.3.4
10
- pytest-xdist>=3.6.1
File without changes
File without changes
File without changes