chunkr-ai 0.0.35__tar.gz → 0.0.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {chunkr_ai-0.0.35/src/chunkr_ai.egg-info → chunkr_ai-0.0.36}/PKG-INFO +1 -1
  2. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/pyproject.toml +1 -1
  3. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/task_response.py +27 -22
  4. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
  5. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/tests/test_chunkr.py +30 -0
  6. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/LICENSE +0 -0
  7. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/README.md +0 -0
  8. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/setup.cfg +0 -0
  9. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/__init__.py +0 -0
  10. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/__init__.py +0 -0
  11. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/auth.py +0 -0
  12. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/chunkr.py +0 -0
  13. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/chunkr_base.py +0 -0
  14. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/configuration.py +0 -0
  15. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/decorators.py +0 -0
  16. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/misc.py +0 -0
  17. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/api/protocol.py +0 -0
  18. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai/models.py +0 -0
  19. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  20. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  21. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai.egg-info/requires.txt +0 -0
  22. {chunkr_ai-0.0.35 → chunkr_ai-0.0.36}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.35
3
+ Version: 0.0.36
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.35"
7
+ version = "0.0.36"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -103,6 +103,29 @@ class TaskResponse(BaseModel, Generic[T]):
103
103
  r.raise_for_status()
104
104
  return await self.poll()
105
105
 
106
+ def _write_to_file(self, content: str | dict, output_file: str, is_json: bool = False) -> None:
107
+ """Helper method to write content to a file
108
+
109
+ Args:
110
+ content: Content to write (string or dict for JSON)
111
+ output_file: Path to save the content
112
+ is_json: Whether the content should be written as JSON
113
+ """
114
+ class DateTimeEncoder(json.JSONEncoder):
115
+ def default(self, obj):
116
+ if isinstance(obj, datetime):
117
+ return obj.isoformat()
118
+ return super().default(obj)
119
+ if output_file:
120
+ directory = os.path.dirname(output_file)
121
+ if directory:
122
+ os.makedirs(directory, exist_ok=True)
123
+ with open(output_file, "w", encoding="utf-8") as f:
124
+ if is_json:
125
+ json.dump(content, f, cls=DateTimeEncoder, indent=2)
126
+ else:
127
+ f.write(content)
128
+
106
129
  def html(self, output_file: str = None) -> str:
107
130
  """Get the full HTML of the task
108
131
 
@@ -110,10 +133,7 @@ class TaskResponse(BaseModel, Generic[T]):
110
133
  output_file (str, optional): Path to save the HTML content. Defaults to None.
111
134
  """
112
135
  content = self._get_content("html")
113
- if output_file:
114
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
115
- with open(output_file, "w", encoding="utf-8") as f:
116
- f.write(content)
136
+ self._write_to_file(content, output_file)
117
137
  return content
118
138
 
119
139
  def markdown(self, output_file: str = None) -> str:
@@ -123,10 +143,7 @@ class TaskResponse(BaseModel, Generic[T]):
123
143
  output_file (str, optional): Path to save the markdown content. Defaults to None.
124
144
  """
125
145
  content = self._get_content("markdown")
126
- if output_file:
127
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
128
- with open(output_file, "w", encoding="utf-8") as f:
129
- f.write(content)
146
+ self._write_to_file(content, output_file)
130
147
  return content
131
148
 
132
149
  def content(self, output_file: str = None) -> str:
@@ -136,10 +153,7 @@ class TaskResponse(BaseModel, Generic[T]):
136
153
  output_file (str, optional): Path to save the content. Defaults to None.
137
154
  """
138
155
  content = self._get_content("content")
139
- if output_file:
140
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
141
- with open(output_file, "w", encoding="utf-8") as f:
142
- f.write(content)
156
+ self._write_to_file(content, output_file)
143
157
  return content
144
158
 
145
159
  def json(self, output_file: str = None) -> dict:
@@ -148,17 +162,8 @@ class TaskResponse(BaseModel, Generic[T]):
148
162
  Args:
149
163
  output_file (str, optional): Path to save the task data as JSON. Defaults to None.
150
164
  """
151
- class DateTimeEncoder(json.JSONEncoder):
152
- def default(self, obj):
153
- if isinstance(obj, datetime):
154
- return obj.isoformat()
155
- return super().default(obj)
156
-
157
165
  data = self.model_dump()
158
- if output_file:
159
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
160
- with open(output_file, "w", encoding="utf-8") as f:
161
- json.dump(data, f, cls=DateTimeEncoder, indent=2)
166
+ self._write_to_file(data, output_file, is_json=True)
162
167
  return data
163
168
 
164
169
  def _get_content(self, t: str) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.35
3
+ Version: 0.0.36
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -67,6 +67,7 @@ async def test_send_pil_image(client, sample_image):
67
67
  response = await client.upload(sample_image)
68
68
  assert response.task_id is not None
69
69
  assert response.status == "Succeeded"
70
+ assert response.output is not None
70
71
 
71
72
  @pytest.mark.asyncio
72
73
  async def test_ocr_auto(client, sample_path):
@@ -220,3 +221,32 @@ async def test_task_operations_after_client_close(client, sample_path):
220
221
  await client.close()
221
222
  result = await task.poll()
222
223
  assert result.status == "Succeeded"
224
+
225
+ @pytest.mark.asyncio
226
+ async def test_output_files_no_dir(client, sample_path, tmp_path):
227
+ await client.upload(sample_path)
228
+
229
+ html_file = tmp_path / "output.html"
230
+ md_file = tmp_path / "output.md"
231
+ content_file = tmp_path / "output.txt"
232
+ json_file = tmp_path / "output.json"
233
+
234
+ assert html_file.exists()
235
+ assert md_file.exists()
236
+ assert content_file.exists()
237
+ assert json_file.exists()
238
+
239
+ @pytest.mark.asyncio
240
+ async def test_output_files_with_dirs(client, sample_path, tmp_path):
241
+ await client.upload(sample_path)
242
+
243
+ nested_dir = tmp_path / "nested" / "output" / "dir"
244
+ html_file = nested_dir / "output.html"
245
+ md_file = nested_dir / "output.md"
246
+ content_file = nested_dir / "output.txt"
247
+ json_file = nested_dir / "output.json"
248
+
249
+ assert html_file.exists()
250
+ assert md_file.exists()
251
+ assert content_file.exists()
252
+ assert json_file.exists()
File without changes
File without changes
File without changes