PyPI - chunkr-ai - Versions diffs - 0.0.48__tar.gz → 0.0.49__tar.gz - Mend

chunkr-ai 0.0.48tar.gz → 0.0.49tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{chunkr_ai-0.0.48/src/chunkr_ai.egg-info → chunkr_ai-0.0.49}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chunkr-ai
-Version: 0.0.48
+Version: 0.0.49
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 License: MIT License

{chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "chunkr-ai"
-version = "0.0.48"
+version = "0.0.49"
 authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
 description = "Python client for Chunkr: open source document intelligence"
 readme = "README.md"

{chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/configuration.py RENAMED Viewed

@@ -23,6 +23,7 @@ class GenerationConfig(BaseModel):
     markdown: Optional[GenerationStrategy] = None
     crop_image: Optional[CroppingStrategy] = None
     embed_sources: Optional[List[EmbedSource]] = Field(default_factory=lambda: [EmbedSource.MARKDOWN])
+    extended_context: Optional[bool] = None
 class SegmentProcessing(BaseModel):
     model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)

{chunkr_ai-0.0.48 → chunkr_ai-0.0.49/src/chunkr_ai.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chunkr-ai
-Version: 0.0.48
+Version: 0.0.49
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 License: MIT License

{chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/tests/test_chunkr.py RENAMED Viewed

@@ -2,27 +2,25 @@ import pytest
 from pathlib import Path
 from PIL import Image
 import asyncio
-import base64
-import io
-import tempfile
 from typing import Awaitable
 from chunkr_ai import Chunkr
 from chunkr_ai.models import (
+    ChunkProcessing,
     Configuration,
-    GenerationStrategy,
+    EmbedSource,
+    ErrorHandlingStrategy,
+    FallbackStrategy,
     GenerationConfig,
+    GenerationStrategy,
+    LlmProcessing,
     OcrStrategy,
     Pipeline,
     SegmentationStrategy,
     SegmentProcessing,
-    ChunkProcessing,
+    Status,
     TaskResponse,
-    EmbedSource,
-    ErrorHandlingStrategy,
     Tokenizer,
-    LlmProcessing,
-    FallbackStrategy,
 )
 @pytest.fixture
@@ -167,6 +165,21 @@ def model_fallback_config():
         ),
     )
+@pytest.fixture
+def extended_context_config():
+    return Configuration(
+        segment_processing=SegmentProcessing(
+            picture=GenerationConfig(
+                extended_context=True,
+                html=GenerationStrategy.LLM,
+            ),
+            table=GenerationConfig(
+                extended_context=True,
+                html=GenerationStrategy.LLM,
+            )
+        ),
+    )
 @pytest.mark.asyncio
 async def test_ocr_auto(client, sample_path):
     response = await client.upload(sample_path, Configuration(ocr_strategy=OcrStrategy.AUTO))
@@ -265,9 +278,18 @@ async def test_cancel_task(client, sample_path):
 @pytest.mark.asyncio
 async def test_cancel_task_direct(client, sample_path):
     task = await client.create_task(sample_path)
-    assert isinstance(task, Awaitable) and isinstance(task, TaskResponse)
     assert task.status == "Starting"
-    await task.cancel()
+    try:
+        await task.cancel()
+    except Exception as e:
+        task = await client.get_task(task.task_id)
+        print(task.status)
+        if task.status == Status.PROCESSING:
+            print("Task is processing, so it can't be cancelled")
+            assert True
+        else:
+            print("Task status:", task.status)
+            raise e
     assert task.status == "Cancelled"
 @pytest.mark.asyncio
@@ -304,6 +326,7 @@ async def test_pipeline_type_azure(client, sample_path):
     assert response.task_id is not None
     assert response.status == "Succeeded"
     assert response.output is not None
+    assert response.configuration.pipeline == Pipeline.AZURE
 @pytest.mark.asyncio
 async def test_pipeline_type_chunkr(client, sample_path):
@@ -311,7 +334,8 @@ async def test_pipeline_type_chunkr(client, sample_path):
     assert response.task_id is not None
     assert response.status == "Succeeded"
     assert response.output is not None
+    assert response.configuration.pipeline == Pipeline.CHUNKR
 @pytest.mark.asyncio
 async def test_client_lifecycle(client, sample_path):
     response1 = await client.upload(sample_path)
@@ -528,4 +552,33 @@ async def test_fallback_strategy_serialization():
     # Test string representation
     assert str(none_strategy) == "None"
     assert str(default_strategy) == "Default"
-    assert str(model_strategy) == "Model(gpt-4.1)"
+    assert str(model_strategy) == "Model(gpt-4.1)"
+@pytest.mark.asyncio
+async def test_extended_context(client, sample_path, extended_context_config):
+    """Tests uploading with extended context enabled for pictures and tables."""
+    print("\nTesting extended context for Pictures and Tables...")
+    try:
+        task = await client.upload(sample_path, config=extended_context_config)
+        print(f"Task created with extended context config: {task.task_id}")
+        print(f"Initial Status: {task.status}")
+        # Poll the task until it finishes or fails
+        print(f"Final Status: {task.status}")
+        print(f"Message: {task.message}")
+        # Basic assertion: Check if the task completed (either succeeded or failed)
+        assert task.status in [Status.SUCCEEDED, Status.FAILED], f"Task ended in unexpected state: {task.status}"
+        # More specific assertions based on expected outcomes with your local server
+        # if task.status == Status.FAILED:
+        #     assert "context_length_exceeded" in task.message, "Expected context length error"
+        # elif task.status == Status.SUCCEEDED:
+        #     # Check if output reflects extended context usage if possible
+        #     pass
+        print("Extended context test completed.")
+    except Exception as e:
+        print(f"Error during extended context test: {e}")
+        raise # Re-raise the exception to fail the test explicitly