chunkr-ai 0.0.35__tar.gz → 0.0.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chunkr_ai-0.0.35/src/chunkr_ai.egg-info → chunkr_ai-0.0.36}/PKG-INFO +1 -1
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/pyproject.toml +1 -1
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/task_response.py +27 -22
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/tests/test_chunkr.py +30 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/LICENSE +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/README.md +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/setup.cfg +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/__init__.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/__init__.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/auth.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/chunkr.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/chunkr_base.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/configuration.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/decorators.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/misc.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/protocol.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/models.py +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai.egg-info/requires.txt +0 -0
- {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "chunkr-ai"
|
7
|
-
version = "0.0.
|
7
|
+
version = "0.0.36"
|
8
8
|
authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
|
9
9
|
description = "Python client for Chunkr: open source document intelligence"
|
10
10
|
readme = "README.md"
|
@@ -103,6 +103,29 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
103
103
|
r.raise_for_status()
|
104
104
|
return await self.poll()
|
105
105
|
|
106
|
+
def _write_to_file(self, content: str | dict, output_file: str, is_json: bool = False) -> None:
|
107
|
+
"""Helper method to write content to a file
|
108
|
+
|
109
|
+
Args:
|
110
|
+
content: Content to write (string or dict for JSON)
|
111
|
+
output_file: Path to save the content
|
112
|
+
is_json: Whether the content should be written as JSON
|
113
|
+
"""
|
114
|
+
class DateTimeEncoder(json.JSONEncoder):
|
115
|
+
def default(self, obj):
|
116
|
+
if isinstance(obj, datetime):
|
117
|
+
return obj.isoformat()
|
118
|
+
return super().default(obj)
|
119
|
+
if output_file:
|
120
|
+
directory = os.path.dirname(output_file)
|
121
|
+
if directory:
|
122
|
+
os.makedirs(directory, exist_ok=True)
|
123
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
124
|
+
if is_json:
|
125
|
+
json.dump(content, f, cls=DateTimeEncoder, indent=2)
|
126
|
+
else:
|
127
|
+
f.write(content)
|
128
|
+
|
106
129
|
def html(self, output_file: str = None) -> str:
|
107
130
|
"""Get the full HTML of the task
|
108
131
|
|
@@ -110,10 +133,7 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
110
133
|
output_file (str, optional): Path to save the HTML content. Defaults to None.
|
111
134
|
"""
|
112
135
|
content = self._get_content("html")
|
113
|
-
|
114
|
-
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
115
|
-
with open(output_file, "w", encoding="utf-8") as f:
|
116
|
-
f.write(content)
|
136
|
+
self._write_to_file(content, output_file)
|
117
137
|
return content
|
118
138
|
|
119
139
|
def markdown(self, output_file: str = None) -> str:
|
@@ -123,10 +143,7 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
123
143
|
output_file (str, optional): Path to save the markdown content. Defaults to None.
|
124
144
|
"""
|
125
145
|
content = self._get_content("markdown")
|
126
|
-
|
127
|
-
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
128
|
-
with open(output_file, "w", encoding="utf-8") as f:
|
129
|
-
f.write(content)
|
146
|
+
self._write_to_file(content, output_file)
|
130
147
|
return content
|
131
148
|
|
132
149
|
def content(self, output_file: str = None) -> str:
|
@@ -136,10 +153,7 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
136
153
|
output_file (str, optional): Path to save the content. Defaults to None.
|
137
154
|
"""
|
138
155
|
content = self._get_content("content")
|
139
|
-
|
140
|
-
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
141
|
-
with open(output_file, "w", encoding="utf-8") as f:
|
142
|
-
f.write(content)
|
156
|
+
self._write_to_file(content, output_file)
|
143
157
|
return content
|
144
158
|
|
145
159
|
def json(self, output_file: str = None) -> dict:
|
@@ -148,17 +162,8 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
148
162
|
Args:
|
149
163
|
output_file (str, optional): Path to save the task data as JSON. Defaults to None.
|
150
164
|
"""
|
151
|
-
class DateTimeEncoder(json.JSONEncoder):
|
152
|
-
def default(self, obj):
|
153
|
-
if isinstance(obj, datetime):
|
154
|
-
return obj.isoformat()
|
155
|
-
return super().default(obj)
|
156
|
-
|
157
165
|
data = self.model_dump()
|
158
|
-
|
159
|
-
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
160
|
-
with open(output_file, "w", encoding="utf-8") as f:
|
161
|
-
json.dump(data, f, cls=DateTimeEncoder, indent=2)
|
166
|
+
self._write_to_file(data, output_file, is_json=True)
|
162
167
|
return data
|
163
168
|
|
164
169
|
def _get_content(self, t: str) -> str:
|
@@ -67,6 +67,7 @@ async def test_send_pil_image(client, sample_image):
|
|
67
67
|
response = await client.upload(sample_image)
|
68
68
|
assert response.task_id is not None
|
69
69
|
assert response.status == "Succeeded"
|
70
|
+
assert response.output is not None
|
70
71
|
|
71
72
|
@pytest.mark.asyncio
|
72
73
|
async def test_ocr_auto(client, sample_path):
|
@@ -220,3 +221,32 @@ async def test_task_operations_after_client_close(client, sample_path):
|
|
220
221
|
await client.close()
|
221
222
|
result = await task.poll()
|
222
223
|
assert result.status == "Succeeded"
|
224
|
+
|
225
|
+
@pytest.mark.asyncio
|
226
|
+
async def test_output_files_no_dir(client, sample_path, tmp_path):
|
227
|
+
await client.upload(sample_path)
|
228
|
+
|
229
|
+
html_file = tmp_path / "output.html"
|
230
|
+
md_file = tmp_path / "output.md"
|
231
|
+
content_file = tmp_path / "output.txt"
|
232
|
+
json_file = tmp_path / "output.json"
|
233
|
+
|
234
|
+
assert html_file.exists()
|
235
|
+
assert md_file.exists()
|
236
|
+
assert content_file.exists()
|
237
|
+
assert json_file.exists()
|
238
|
+
|
239
|
+
@pytest.mark.asyncio
|
240
|
+
async def test_output_files_with_dirs(client, sample_path, tmp_path):
|
241
|
+
await client.upload(sample_path)
|
242
|
+
|
243
|
+
nested_dir = tmp_path / "nested" / "output" / "dir"
|
244
|
+
html_file = nested_dir / "output.html"
|
245
|
+
md_file = nested_dir / "output.md"
|
246
|
+
content_file = nested_dir / "output.txt"
|
247
|
+
json_file = nested_dir / "output.json"
|
248
|
+
|
249
|
+
assert html_file.exists()
|
250
|
+
assert md_file.exists()
|
251
|
+
assert content_file.exists()
|
252
|
+
assert json_file.exists()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|