PyPI - chunkr-ai - Versions diffs - 0.0.4__tar.gz → 0.0.6__tar.gz - Mend

chunkr-ai 0.0.4tar.gz → 0.0.6tar.gz

Files changed (24) hide show

{chunkr_ai-0.0.4/src/chunkr_ai.egg-info → chunkr_ai-0.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: chunkr-ai
-Version: 0.0.4
+Version: 0.0.6
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 Project-URL: Homepage, https://chunkr.ai
@@ -9,6 +9,7 @@ License-File: LICENSE
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: pillow>=11.1.0
 Requires-Dist: pydantic>=2.10.4
+Requires-Dist: pytest-asyncio>=0.25.2
 Requires-Dist: python-dotenv>=1.0.1
 Requires-Dist: requests>=2.32.3
 Provides-Extra: test
@@ -192,13 +193,3 @@ chunkr = Chunkr(
     url="https://api.chunkr.ai"
 )
 ```
-## Run tests
-```python
-# Install dependencies
-uv pip install -e ".[test]"
-# Run tests
-uv run pytest
-```

{chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/README.md RENAMED Viewed

@@ -174,14 +174,4 @@ chunkr = Chunkr(
     api_key="your-api-key",
     url="https://api.chunkr.ai"
 )
-```
-## Run tests
-```python
-# Install dependencies
-uv pip install -e ".[test]"
-# Run tests
-uv run pytest
-```
+```

{chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "chunkr-ai"
-version = "0.0.4"
+version = "0.0.6"
 authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
 description = "Python client for Chunkr: open source document intelligence"
 readme = "README.md"
@@ -14,6 +14,7 @@ dependencies = [
     "httpx>=0.28.1",
     "pillow>=11.1.0",
     "pydantic>=2.10.4",
+    "pytest-asyncio>=0.25.2",
     "python-dotenv>=1.0.1",
     "requests>=2.32.3",
 ]
@@ -23,3 +24,4 @@ test = [
     "pytest>=8.3.4",
     "pytest-xdist>=3.6.1",
 ]

{chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/task.py RENAMED Viewed

@@ -24,6 +24,7 @@ class TaskResponse(BaseModel):
     output: Optional[OutputResponse]
     page_count: Optional[int]
     pdf_url: Optional[str]
+    started_at: Optional[datetime]
     status: Status
     task_id: str
     task_url: Optional[str]
@@ -57,8 +58,9 @@ class TaskResponse(BaseModel):
         while True:
             try:
                 r = await self._client._client.get(self.task_url, headers=self._client._headers())
-                await r.raise_for_status()
-                return await r.json()
+                r.raise_for_status()
+                response = r.json()
+                return response
             except (ConnectionError, TimeoutError) as _:
                 print("Connection error while polling the task, retrying...")
                 await asyncio.sleep(0.5)
@@ -117,15 +119,4 @@ class TaskResponse(BaseModel):
     def content(self) -> str:
         """Get full text for the task"""
-        return self._get_content("content")
-class TaskPayload(BaseModel):
-    current_configuration: Configuration
-    file_name: str
-    image_folder_location: str
-    input_location: str
-    output_location: str
-    pdf_location: str
-    previous_configuration: Optional[Configuration]
-    task_id: str
-    user_id: str
+        return self._get_content("content")

{chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/models.py RENAMED Viewed

@@ -20,7 +20,7 @@ from .api.config import (
     SegmentationStrategy,
 )
-from .api.task import TaskResponse, TaskPayload, Status
+from .api.task import TaskResponse, Status
 __all__ = [
     'BoundingBox',
@@ -43,6 +43,5 @@ __all__ = [
     'SegmentType',
     'SegmentationStrategy',
     'Status',
-    'TaskPayload',
     'TaskResponse'
 ]

{chunkr_ai-0.0.4 → chunkr_ai-0.0.6/src/chunkr_ai.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: chunkr-ai
-Version: 0.0.4
+Version: 0.0.6
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 Project-URL: Homepage, https://chunkr.ai
@@ -9,6 +9,7 @@ License-File: LICENSE
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: pillow>=11.1.0
 Requires-Dist: pydantic>=2.10.4
+Requires-Dist: pytest-asyncio>=0.25.2
 Requires-Dist: python-dotenv>=1.0.1
 Requires-Dist: requests>=2.32.3
 Provides-Extra: test
@@ -192,13 +193,3 @@ chunkr = Chunkr(
     url="https://api.chunkr.ai"
 )
 ```
-## Run tests
-```python
-# Install dependencies
-uv pip install -e ".[test]"
-# Run tests
-uv run pytest
-```

{chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,7 @@
 httpx>=0.28.1
 pillow>=11.1.0
 pydantic>=2.10.4
+pytest-asyncio>=0.25.2
 python-dotenv>=1.0.1
 requests>=2.32.3

chunkr_ai-0.0.6/tests/test_chunkr.py ADDED Viewed

@@ -0,0 +1,212 @@
+import pytest
+import pytest_asyncio
+from pathlib import Path
+from PIL import Image
+from chunkr_ai import Chunkr, ChunkrAsync
+from chunkr_ai.models import (
+    ChunkProcessing,
+    Configuration,
+    GenerationStrategy,
+    GenerationConfig,
+    JsonSchema,
+    OcrStrategy,
+    Property,
+    SegmentationStrategy,
+    SegmentProcessing,
+    TaskResponse,
+)
+@pytest.fixture(params=[
+    pytest.param(("sync", Chunkr()), id="sync"),
+    pytest.param(("async", ChunkrAsync()), id="async")
+])
+def chunkr_client(request):
+    return request.param
+@pytest.fixture
+def sample_path():
+    return Path("tests/files/test.pdf")
+@pytest.fixture
+def sample_image():
+    img = Image.open("tests/files/test.jpg")
+    return img
+@pytest.mark.asyncio
+async def test_send_file_path(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_path) if client_type == "async" else client.upload(sample_path)
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_send_file_path_str(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(str(sample_path)) if client_type == "async" else client.upload(str(sample_path))
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_send_opened_file(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    with open(sample_path, 'rb') as f:
+        response = await client.upload(f) if client_type == "async" else client.upload(f)
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_send_pil_image(chunkr_client, sample_image):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_image) if client_type == "async" else client.upload(sample_image)
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+@pytest.mark.asyncio
+async def test_ocr_auto(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_path, Configuration(
+        ocr_strategy=OcrStrategy.AUTO
+    )) if client_type == "async" else client.upload(sample_path, Configuration(
+        ocr_strategy=OcrStrategy.AUTO
+    ))
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_expires_in(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_path, Configuration(
+        expires_in=10
+    )) if client_type == "async" else client.upload(sample_path, Configuration(
+        expires_in=10
+    ))
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_chunk_processing(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_path, Configuration(
+        chunk_processing=ChunkProcessing(
+            target_length=1024
+        )
+    )) if client_type == "async" else client.upload(sample_path, Configuration(
+        chunk_processing=ChunkProcessing(
+            target_length=1024
+        )
+    ))
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_segmentation_strategy_page(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_path, Configuration(
+        segmentation_strategy=SegmentationStrategy.PAGE
+    )) if client_type == "async" else client.upload(sample_path, Configuration(
+        segmentation_strategy=SegmentationStrategy.PAGE
+    ))
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_page_llm_html(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_path, Configuration(
+        segmentation_strategy=SegmentationStrategy.PAGE,
+        segment_processing=SegmentProcessing(
+            page=GenerationConfig(
+                html=GenerationStrategy.LLM
+            )
+        )
+    )) if client_type == "async" else client.upload(sample_path, Configuration(
+        segmentation_strategy=SegmentationStrategy.PAGE,
+        segment_processing=SegmentProcessing(
+            page=GenerationConfig(
+                html=GenerationStrategy.LLM
+            )
+        )
+    ))
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_page_llm(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_path, Configuration(
+        segmentation_strategy=SegmentationStrategy.PAGE,
+        segment_processing=SegmentProcessing(
+            page=GenerationConfig(
+                html=GenerationStrategy.LLM,
+                markdown=GenerationStrategy.LLM
+            )
+        )
+    )) if client_type == "async" else client.upload(sample_path, Configuration(
+        segmentation_strategy=SegmentationStrategy.PAGE,
+        segment_processing=SegmentProcessing(
+            page=GenerationConfig(
+                html=GenerationStrategy.LLM,
+                markdown=GenerationStrategy.LLM
+            )
+        )
+    ))
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None
+@pytest.mark.asyncio
+async def test_json_schema(chunkr_client, sample_path):
+    client_type, client = chunkr_client
+    response = await client.upload(sample_path, Configuration(
+        json_schema=JsonSchema(
+            title="Sales Data",
+            properties=[
+                Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
+                Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
+            ]
+        )
+    )) if client_type == "async" else client.upload(sample_path, Configuration(
+        json_schema=JsonSchema(
+            title="Sales Data",
+            properties=[
+                Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
+                Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
+            ]
+        )
+    ))
+    assert isinstance(response, TaskResponse)
+    assert response.task_id is not None
+    assert response.status == "Succeeded"
+    assert response.output is not None

chunkr_ai-0.0.4/tests/test_chunkr.py DELETED Viewed

@@ -1,158 +0,0 @@
-import pytest
-from pathlib import Path
-from PIL import Image
-from chunkr_ai import Chunkr, ChunkrAsync
-from chunkr_ai.models import (
-    ChunkProcessing,
-    Configuration,
-    GenerationStrategy,
-    GenerationConfig,
-    JsonSchema,
-    OcrStrategy,
-    Property,
-    SegmentationStrategy,
-    SegmentProcessing,
-    TaskResponse,
-)
-@pytest.fixture
-def chunkr():
-    return Chunkr()
-@pytest.fixture
-def async_chunkr():
-    return ChunkrAsync()
-@pytest.fixture
-def sample_path():
-    return Path("tests/files/test.pdf")
-@pytest.fixture
-def sample_image():
-    img = Image.open("tests/files/test.jpg")
-    return img
-def test_send_file_path(chunkr, sample_path):
-    response = chunkr.upload(sample_path)
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_send_file_path_str(chunkr, sample_path):
-    response = chunkr.upload(str(sample_path))
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_send_opened_file(chunkr, sample_path):
-    with open(sample_path, 'rb') as f:
-        response = chunkr.upload(f)
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_send_pil_image(chunkr, sample_image):
-    response = chunkr.upload(sample_image)
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-def test_ocr_auto(chunkr, sample_path):
-    response = chunkr.upload(sample_path, Configuration(
-        ocr_strategy=OcrStrategy.AUTO
-    ))
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_expires_in(chunkr, sample_path):
-    response = chunkr.upload(sample_path, Configuration(
-        expires_in=10
-    ))
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_chunk_processing(chunkr, sample_path):
-    response = chunkr.upload(sample_path, Configuration(
-        chunk_processing=ChunkProcessing(
-            target_length=1024
-        )
-    ))
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_segmentation_strategy_page(chunkr, sample_path):
-    response = chunkr.upload(sample_path, Configuration(
-        segmentation_strategy=SegmentationStrategy.PAGE
-    ))
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_page_llm_html(chunkr, sample_path):
-    response = chunkr.upload(sample_path, Configuration(
-        segmentation_strategy=SegmentationStrategy.PAGE,
-        segment_processing=SegmentProcessing(
-            page=GenerationConfig(
-                html=GenerationStrategy.LLM
-            )
-        )
-    ))
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_page_llm(chunkr, sample_path):
-    response = chunkr.upload(sample_path, Configuration(
-        segmentation_strategy=SegmentationStrategy.PAGE,
-        segment_processing=SegmentProcessing(
-            page=GenerationConfig(
-                html=GenerationStrategy.LLM,
-                markdown=GenerationStrategy.LLM
-            )
-        )
-    ))
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-def test_json_schema(chunkr, sample_path):
-    response = chunkr.upload(sample_path, Configuration(
-        json_schema=JsonSchema(
-            title="Sales Data",
-            properties=[
-                Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
-                Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
-            ]
-        )
-    ))
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None
-async def test_async_send_file_path(async_chunkr, sample_path):
-    response = await async_chunkr.upload(sample_path)
-    assert isinstance(response, TaskResponse)
-    assert response.task_id is not None
-    assert response.status == "Succeeded"
-    assert response.output is not None