PyPI - chunkr-ai - Versions diffs - 0.0.37__tar.gz → 0.0.39__tar.gz - Mend

chunkr-ai 0.0.37tar.gz → 0.0.39tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{chunkr_ai-0.0.37/src/chunkr_ai.egg-info → chunkr_ai-0.0.39}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: chunkr-ai
-Version: 0.0.37
+Version: 0.0.39
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 License: MIT License

{chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "chunkr-ai"
-version = "0.0.37"
+version = "0.0.39"
 authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
 description = "Python client for Chunkr: open source document intelligence"
 readme = "README.md"

{chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/configuration.py RENAMED Viewed

@@ -19,20 +19,18 @@ class GenerationConfig(BaseModel):
 class SegmentProcessing(BaseModel):
     model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
-    title: Optional[GenerationConfig] = Field(default=None, alias="Title")
-    section_header: Optional[GenerationConfig] = Field(
-        default=None, alias="SectionHeader"
-    )
-    text: Optional[GenerationConfig] = Field(default=None, alias="Text")
-    list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
-    table: Optional[GenerationConfig] = Field(default=None, alias="Table")
-    picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
     caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
-    formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
     footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
-    page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
-    page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
+    formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
+    list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
     page: Optional[GenerationConfig] = Field(default=None, alias="Page")
+    page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
+    page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
+    picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
+    section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
+    table: Optional[GenerationConfig] = Field(default=None, alias="Table")
+    text: Optional[GenerationConfig] = Field(default=None, alias="Text")
+    title: Optional[GenerationConfig] = Field(default=None, alias="Title")
 class ChunkProcessing(BaseModel):
     ignore_headers_and_footers: Optional[bool] = None
@@ -84,11 +82,13 @@ class Segment(BaseModel):
     page_width: float
     segment_id: str
     segment_type: SegmentType
+    confidence: Optional[float]
 class Chunk(BaseModel):
     chunk_id: str
     chunk_length: int
     segments: List[Segment]
+    embed: Optional[str] = None
 class OutputResponse(BaseModel):
     chunks: List[Chunk]

{chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/src/chunkr_ai/api/task_response.py RENAMED Viewed

@@ -144,7 +144,7 @@ class TaskResponse(BaseModel, Generic[T]):
         Args:
             output_file (str, optional): Path to save the markdown content. Defaults to None.
         """
-        content = self._get_content("markdown")
+        content = self._get_content("markdown", separator="\n\n")
         self._write_to_file(content, output_file)
         return content
@@ -168,7 +168,7 @@ class TaskResponse(BaseModel, Generic[T]):
         self._write_to_file(data, output_file, is_json=True)
         return data
-    def _get_content(self, t: str) -> str:
+    def _get_content(self, t: str, separator: str = "\n") -> str:
         if not self.output:
             return ""
         parts = []
@@ -177,4 +177,4 @@ class TaskResponse(BaseModel, Generic[T]):
                 v = getattr(s, t)
                 if v:
                     parts.append(v)
-        return "\n".join(parts)
+        return separator.join(parts)

{chunkr_ai-0.0.37 → chunkr_ai-0.0.39/src/chunkr_ai.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: chunkr-ai
-Version: 0.0.37
+Version: 0.0.39
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 License: MIT License

{chunkr_ai-0.0.37 → chunkr_ai-0.0.39}/tests/test_chunkr.py RENAMED Viewed

@@ -247,13 +247,18 @@ async def test_send_base64_file_with_filename(client, sample_path):
 @pytest.mark.asyncio
 async def test_output_files_no_dir(client, sample_path, tmp_path):
-    await client.upload(sample_path)
+    task = await client.upload(sample_path)
     html_file = tmp_path / "output.html"
     md_file = tmp_path / "output.md"
     content_file = tmp_path / "output.txt"
     json_file = tmp_path / "output.json"
+    task.html(html_file)
+    task.markdown(md_file)
+    task.content(content_file)
+    task.json(json_file)
     assert html_file.exists()
     assert md_file.exists()
     assert content_file.exists()
@@ -261,13 +266,18 @@ async def test_output_files_no_dir(client, sample_path, tmp_path):
 @pytest.mark.asyncio
 async def test_output_files_with_dirs(client, sample_path, tmp_path):
-    await client.upload(sample_path)
+    task = await client.upload(sample_path)
     nested_dir = tmp_path / "nested" / "output" / "dir"
     html_file = nested_dir / "output.html"
     md_file = nested_dir / "output.md"
     content_file = nested_dir / "output.txt"
     json_file = nested_dir / "output.json"
+    task.html(html_file)
+    task.markdown(md_file)
+    task.content(content_file)
+    task.json(json_file)
     assert html_file.exists()
     assert md_file.exists()