chunkr-ai 0.0.37__tar.gz → 0.0.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {chunkr_ai-0.0.37/src/chunkr_ai.egg-info → chunkr_ai-0.0.39}/PKG-INFO +1 -1
  2. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/pyproject.toml +1 -1
  3. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/configuration.py +11 -11
  4. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/task_response.py +3 -3
  5. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
  6. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/tests/test_chunkr.py +12 -2
  7. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/LICENSE +0 -0
  8. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/README.md +0 -0
  9. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/setup.cfg +0 -0
  10. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/__init__.py +0 -0
  11. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/__init__.py +0 -0
  12. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/auth.py +0 -0
  13. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/chunkr.py +0 -0
  14. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/chunkr_base.py +0 -0
  15. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/decorators.py +0 -0
  16. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/misc.py +0 -0
  17. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/protocol.py +0 -0
  18. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/models.py +0 -0
  19. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  20. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  21. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai.egg-info/requires.txt +0 -0
  22. {chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.37
3
+ Version: 0.0.39
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.37"
7
+ version = "0.0.39"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -19,20 +19,18 @@ class GenerationConfig(BaseModel):
19
19
  class SegmentProcessing(BaseModel):
20
20
  model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
21
21
 
22
- title: Optional[GenerationConfig] = Field(default=None, alias="Title")
23
- section_header: Optional[GenerationConfig] = Field(
24
- default=None, alias="SectionHeader"
25
- )
26
- text: Optional[GenerationConfig] = Field(default=None, alias="Text")
27
- list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
28
- table: Optional[GenerationConfig] = Field(default=None, alias="Table")
29
- picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
30
22
  caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
31
- formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
32
23
  footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
33
- page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
34
- page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
24
+ formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
25
+ list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
35
26
  page: Optional[GenerationConfig] = Field(default=None, alias="Page")
27
+ page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
28
+ page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
29
+ picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
30
+ section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
31
+ table: Optional[GenerationConfig] = Field(default=None, alias="Table")
32
+ text: Optional[GenerationConfig] = Field(default=None, alias="Text")
33
+ title: Optional[GenerationConfig] = Field(default=None, alias="Title")
36
34
 
37
35
  class ChunkProcessing(BaseModel):
38
36
  ignore_headers_and_footers: Optional[bool] = None
@@ -84,11 +82,13 @@ class Segment(BaseModel):
84
82
  page_width: float
85
83
  segment_id: str
86
84
  segment_type: SegmentType
85
+ confidence: Optional[float]
87
86
 
88
87
  class Chunk(BaseModel):
89
88
  chunk_id: str
90
89
  chunk_length: int
91
90
  segments: List[Segment]
91
+ embed: Optional[str] = None
92
92
 
93
93
  class OutputResponse(BaseModel):
94
94
  chunks: List[Chunk]
@@ -144,7 +144,7 @@ class TaskResponse(BaseModel, Generic[T]):
144
144
  Args:
145
145
  output_file (str, optional): Path to save the markdown content. Defaults to None.
146
146
  """
147
- content = self._get_content("markdown")
147
+ content = self._get_content("markdown", separator="\n\n")
148
148
  self._write_to_file(content, output_file)
149
149
  return content
150
150
 
@@ -168,7 +168,7 @@ class TaskResponse(BaseModel, Generic[T]):
168
168
  self._write_to_file(data, output_file, is_json=True)
169
169
  return data
170
170
 
171
- def _get_content(self, t: str) -> str:
171
+ def _get_content(self, t: str, separator: str = "\n") -> str:
172
172
  if not self.output:
173
173
  return ""
174
174
  parts = []
@@ -177,4 +177,4 @@ class TaskResponse(BaseModel, Generic[T]):
177
177
  v = getattr(s, t)
178
178
  if v:
179
179
  parts.append(v)
180
- return "\n".join(parts)
180
+ return separator.join(parts)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.37
3
+ Version: 0.0.39
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -247,13 +247,18 @@ async def test_send_base64_file_with_filename(client, sample_path):
247
247
 
248
248
  @pytest.mark.asyncio
249
249
  async def test_output_files_no_dir(client, sample_path, tmp_path):
250
- await client.upload(sample_path)
250
+ task = await client.upload(sample_path)
251
251
 
252
252
  html_file = tmp_path / "output.html"
253
253
  md_file = tmp_path / "output.md"
254
254
  content_file = tmp_path / "output.txt"
255
255
  json_file = tmp_path / "output.json"
256
256
 
257
+ task.html(html_file)
258
+ task.markdown(md_file)
259
+ task.content(content_file)
260
+ task.json(json_file)
261
+
257
262
  assert html_file.exists()
258
263
  assert md_file.exists()
259
264
  assert content_file.exists()
@@ -261,13 +266,18 @@ async def test_output_files_no_dir(client, sample_path, tmp_path):
261
266
 
262
267
  @pytest.mark.asyncio
263
268
  async def test_output_files_with_dirs(client, sample_path, tmp_path):
264
- await client.upload(sample_path)
269
+ task = await client.upload(sample_path)
265
270
 
266
271
  nested_dir = tmp_path / "nested" / "output" / "dir"
267
272
  html_file = nested_dir / "output.html"
268
273
  md_file = nested_dir / "output.md"
269
274
  content_file = nested_dir / "output.txt"
270
275
  json_file = nested_dir / "output.json"
276
+
277
+ task.html(html_file)
278
+ task.markdown(md_file)
279
+ task.content(content_file)
280
+ task.json(json_file)
271
281
 
272
282
  assert html_file.exists()
273
283
  assert md_file.exists()
File without changes
File without changes
File without changes