chunkr-ai 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/config.py +12 -11
- chunkr_ai/api/task.py +8 -1
- chunkr_ai/models.py +1 -2
- {chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/METADATA +82 -12
- {chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/RECORD +8 -8
- {chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/LICENSE +0 -0
- {chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/top_level.txt +0 -0
    
        chunkr_ai/api/config.py
    CHANGED
    
    | @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            from pydantic import BaseModel, Field
         | 
| 1 | 
            +
            from pydantic import BaseModel, Field, model_validator
         | 
| 2 2 | 
             
            from enum import Enum
         | 
| 3 3 | 
             
            from typing import Optional, List, Dict
         | 
| 4 4 |  | 
| @@ -40,15 +40,14 @@ class ChunkProcessing(BaseModel): | |
| 40 40 |  | 
| 41 41 | 
             
            class Property(BaseModel):
         | 
| 42 42 | 
             
                name: str
         | 
| 43 | 
            -
                title: Optional[str]
         | 
| 43 | 
            +
                title: Optional[str] = None
         | 
| 44 44 | 
             
                prop_type: str
         | 
| 45 | 
            -
                description: Optional[str]
         | 
| 46 | 
            -
                default: Optional[str]
         | 
| 45 | 
            +
                description: Optional[str] = None
         | 
| 46 | 
            +
                default: Optional[str] = None
         | 
| 47 47 |  | 
| 48 48 | 
             
            class JsonSchema(BaseModel):
         | 
| 49 49 | 
             
                title: str
         | 
| 50 50 | 
             
                properties: List[Property]
         | 
| 51 | 
            -
                schema_type: Optional[str]
         | 
| 52 51 |  | 
| 53 52 | 
             
            class OcrStrategy(str, Enum):
         | 
| 54 53 | 
             
                ALL = "All"
         | 
| @@ -121,10 +120,12 @@ class Configuration(BaseModel): | |
| 121 120 | 
             
                ocr_strategy: Optional[OcrStrategy] = Field(default=None)
         | 
| 122 121 | 
             
                segment_processing: Optional[SegmentProcessing] = Field(default=None)
         | 
| 123 122 | 
             
                segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
         | 
| 124 | 
            -
                target_chunk_length: Optional[int] = Field(default=None)
         | 
| 125 123 |  | 
| 126 | 
            -
             | 
| 127 | 
            -
                 | 
| 128 | 
            -
             | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 124 | 
            +
                @model_validator(mode='before')
         | 
| 125 | 
            +
                def map_deprecated_fields(cls, values: Dict) -> Dict:
         | 
| 126 | 
            +
                    if isinstance(values, dict) and "target_chunk_length" in values:
         | 
| 127 | 
            +
                        target_length = values.pop("target_chunk_length")
         | 
| 128 | 
            +
                        if target_length is not None:
         | 
| 129 | 
            +
                            values["chunk_processing"] = values.get("chunk_processing", {}) or {}
         | 
| 130 | 
            +
                            values["chunk_processing"]["target_length"] = target_length
         | 
| 131 | 
            +
                    return values
         | 
    
        chunkr_ai/api/task.py
    CHANGED
    
    | @@ -1,11 +1,18 @@ | |
| 1 1 | 
             
            from .protocol import ChunkrClientProtocol
         | 
| 2 | 
            -
            from .config import Configuration,  | 
| 2 | 
            +
            from .config import Configuration, OutputResponse
         | 
| 3 3 | 
             
            import asyncio
         | 
| 4 4 | 
             
            from datetime import datetime
         | 
| 5 | 
            +
            from enum import Enum
         | 
| 5 6 | 
             
            from pydantic import BaseModel, PrivateAttr
         | 
| 6 7 | 
             
            import time
         | 
| 7 8 | 
             
            from typing import Optional, Union
         | 
| 8 9 |  | 
| 10 | 
            +
            class Status(str, Enum):
         | 
| 11 | 
            +
                STARTING = "Starting"
         | 
| 12 | 
            +
                PROCESSING = "Processing"
         | 
| 13 | 
            +
                SUCCEEDED = "Succeeded"
         | 
| 14 | 
            +
                FAILED = "Failed"
         | 
| 15 | 
            +
                
         | 
| 9 16 | 
             
            class TaskResponse(BaseModel):
         | 
| 10 17 | 
             
                configuration: Configuration
         | 
| 11 18 | 
             
                created_at: datetime
         | 
    
        chunkr_ai/models.py
    CHANGED
    
    | @@ -18,10 +18,9 @@ from .api.config import ( | |
| 18 18 | 
             
                SegmentProcessing,
         | 
| 19 19 | 
             
                SegmentType,
         | 
| 20 20 | 
             
                SegmentationStrategy,
         | 
| 21 | 
            -
                Status
         | 
| 22 21 | 
             
            )
         | 
| 23 22 |  | 
| 24 | 
            -
            from .api.task import TaskResponse, TaskPayload
         | 
| 23 | 
            +
            from .api.task import TaskResponse, TaskPayload, Status
         | 
| 25 24 |  | 
| 26 25 | 
             
            __all__ = [
         | 
| 27 26 | 
             
                'BoundingBox',
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Metadata-Version: 2.2
         | 
| 2 2 | 
             
            Name: chunkr-ai
         | 
| 3 | 
            -
            Version: 0.0. | 
| 3 | 
            +
            Version: 0.0.5
         | 
| 4 4 | 
             
            Summary: Python client for Chunkr: open source document intelligence
         | 
| 5 5 | 
             
            Author-email: Ishaan Kapoor <ishaan@lumina.sh>
         | 
| 6 6 | 
             
            Project-URL: Homepage, https://chunkr.ai
         | 
| @@ -17,7 +17,13 @@ Requires-Dist: pytest-xdist>=3.6.1; extra == "test" | |
| 17 17 |  | 
| 18 18 | 
             
            # Chunkr Python Client
         | 
| 19 19 |  | 
| 20 | 
            -
            This  | 
| 20 | 
            +
            This provides a simple interface to interact with the Chunkr API.
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            ## Getting Started
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
         | 
| 21 27 |  | 
| 22 28 | 
             
            ## Installation
         | 
| 23 29 |  | 
| @@ -102,6 +108,80 @@ chunkr.upload(img) | |
| 102 108 |  | 
| 103 109 | 
             
            ### Configuration
         | 
| 104 110 |  | 
| 111 | 
            +
            You can customize the processing behavior by passing a `Configuration` object:
         | 
| 112 | 
            +
             | 
| 113 | 
            +
            ```python
         | 
| 114 | 
            +
            from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
         | 
| 115 | 
            +
             | 
| 116 | 
            +
            # Basic configuration
         | 
| 117 | 
            +
            config = Configuration(
         | 
| 118 | 
            +
                ocr_strategy=OcrStrategy.AUTO,
         | 
| 119 | 
            +
                segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
         | 
| 120 | 
            +
                high_resolution=True,
         | 
| 121 | 
            +
                expires_in=3600,  # seconds
         | 
| 122 | 
            +
            )
         | 
| 123 | 
            +
             | 
| 124 | 
            +
            # Upload with configuration
         | 
| 125 | 
            +
            task = chunkr.upload("document.pdf", config)
         | 
| 126 | 
            +
            ```
         | 
| 127 | 
            +
             | 
| 128 | 
            +
            #### Available Configuration Examples
         | 
| 129 | 
            +
             | 
| 130 | 
            +
            - **Chunk Processing**
         | 
| 131 | 
            +
              ```python
         | 
| 132 | 
            +
              from chunkr_ai.models import ChunkProcessing
         | 
| 133 | 
            +
              config = Configuration(
         | 
| 134 | 
            +
                  chunk_processing=ChunkProcessing(target_length=1024)
         | 
| 135 | 
            +
              )
         | 
| 136 | 
            +
              ```
         | 
| 137 | 
            +
            - **Expires In**
         | 
| 138 | 
            +
              ```python
         | 
| 139 | 
            +
              config = Configuration(expires_in=3600)
         | 
| 140 | 
            +
              ```
         | 
| 141 | 
            +
             | 
| 142 | 
            +
            - **High Resolution**
         | 
| 143 | 
            +
              ```python
         | 
| 144 | 
            +
              config = Configuration(high_resolution=True)
         | 
| 145 | 
            +
              ```
         | 
| 146 | 
            +
             | 
| 147 | 
            +
            - **JSON Schema**
         | 
| 148 | 
            +
              ```python
         | 
| 149 | 
            +
              config = Configuration(json_schema=JsonSchema(
         | 
| 150 | 
            +
                  title="Sales Data",
         | 
| 151 | 
            +
                  properties=[
         | 
| 152 | 
            +
                      Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
         | 
| 153 | 
            +
                      Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
         | 
| 154 | 
            +
                  ]
         | 
| 155 | 
            +
              ))
         | 
| 156 | 
            +
              ```
         | 
| 157 | 
            +
             | 
| 158 | 
            +
            - **OCR Strategy**
         | 
| 159 | 
            +
              ```python
         | 
| 160 | 
            +
              config = Configuration(ocr_strategy=OcrStrategy.AUTO)
         | 
| 161 | 
            +
              ```
         | 
| 162 | 
            +
             | 
| 163 | 
            +
            - **Segment Processing**
         | 
| 164 | 
            +
              ```python
         | 
| 165 | 
            +
              from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
         | 
| 166 | 
            +
              config = Configuration(
         | 
| 167 | 
            +
                  segment_processing=SegmentProcessing(
         | 
| 168 | 
            +
                      page=GenerationConfig(
         | 
| 169 | 
            +
                          html=GenerationStrategy.LLM,
         | 
| 170 | 
            +
                          markdown=GenerationStrategy.LLM
         | 
| 171 | 
            +
                      )
         | 
| 172 | 
            +
                  )
         | 
| 173 | 
            +
              )
         | 
| 174 | 
            +
              ```
         | 
| 175 | 
            +
             | 
| 176 | 
            +
            - **Segmentation Strategy**
         | 
| 177 | 
            +
              ```python
         | 
| 178 | 
            +
              config = Configuration(
         | 
| 179 | 
            +
                  segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS  # or SegmentationStrategy.PAGE
         | 
| 180 | 
            +
              )
         | 
| 181 | 
            +
              ```
         | 
| 182 | 
            +
             | 
| 183 | 
            +
            ## Environment setup
         | 
| 184 | 
            +
             | 
| 105 185 | 
             
            You can provide your API key and URL in several ways:
         | 
| 106 186 | 
             
            1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
         | 
| 107 187 | 
             
            2. `.env` file
         | 
| @@ -112,13 +192,3 @@ chunkr = Chunkr( | |
| 112 192 | 
             
                url="https://api.chunkr.ai"
         | 
| 113 193 | 
             
            )
         | 
| 114 194 | 
             
            ```
         | 
| 115 | 
            -
             | 
| 116 | 
            -
            ## Run tests
         | 
| 117 | 
            -
             | 
| 118 | 
            -
            ```python
         | 
| 119 | 
            -
            # Install dependencies
         | 
| 120 | 
            -
            uv pip install -e ".[test]"
         | 
| 121 | 
            -
             | 
| 122 | 
            -
            # Run tests
         | 
| 123 | 
            -
            uv run pytest
         | 
| 124 | 
            -
            ```
         | 
| @@ -1,17 +1,17 @@ | |
| 1 1 | 
             
            chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
         | 
| 2 2 | 
             
            chunkr_ai/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 3 | 
            -
            chunkr_ai/models.py,sha256= | 
| 3 | 
            +
            chunkr_ai/models.py,sha256=d-B4vfgZClJOoHdPaH3vagwUc4qxeQSmUxab77DKYtQ,874
         | 
| 4 4 | 
             
            chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 5 5 | 
             
            chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 6 6 | 
             
            chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
         | 
| 7 7 | 
             
            chunkr_ai/api/base.py,sha256=WDHx8tU0fl9_-yvYTKL-U0uaxHv-8_bRfiw9Xkl-mWM,6499
         | 
| 8 8 | 
             
            chunkr_ai/api/chunkr.py,sha256=LkBFzGB_T0y3fnBeIn_nwQW6Mb7eZO-iTlzWrmWBoko,3450
         | 
| 9 9 | 
             
            chunkr_ai/api/chunkr_async.py,sha256=B9deRVoe4h3Csh_jEuQxuxQ-DKSuZPdwkanFTyfHmeM,3603
         | 
| 10 | 
            -
            chunkr_ai/api/config.py,sha256= | 
| 10 | 
            +
            chunkr_ai/api/config.py,sha256=K0s1giImciPksu-bO9gzRwUaK2Vo1nxNKQkXlRQ2cb8,3785
         | 
| 11 11 | 
             
            chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
         | 
| 12 | 
            -
            chunkr_ai/api/task.py,sha256= | 
| 13 | 
            -
            chunkr_ai-0.0. | 
| 14 | 
            -
            chunkr_ai-0.0. | 
| 15 | 
            -
            chunkr_ai-0.0. | 
| 16 | 
            -
            chunkr_ai-0.0. | 
| 17 | 
            -
            chunkr_ai-0.0. | 
| 12 | 
            +
            chunkr_ai/api/task.py,sha256=ALU-rYlObbitlM1MKEFeSz_IBUpzb9736Iqu9huWg7c,4392
         | 
| 13 | 
            +
            chunkr_ai-0.0.5.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 14 | 
            +
            chunkr_ai-0.0.5.dist-info/METADATA,sha256=Roj63O2Ms3D1vNfgEmnCYAJESFrOQ9nnsSlyXkvORU4,4806
         | 
| 15 | 
            +
            chunkr_ai-0.0.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         | 
| 16 | 
            +
            chunkr_ai-0.0.5.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
         | 
| 17 | 
            +
            chunkr_ai-0.0.5.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         |