PyPI - chunkr-ai - Versions diffs - 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

chunkr-ai 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl

Files changed (8) hide show

chunkr_ai/api/config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 from enum import Enum
 from typing import Optional, List, Dict
@@ -40,15 +40,14 @@ class ChunkProcessing(BaseModel):
 class Property(BaseModel):
     name: str
-    title: Optional[str]
+    title: Optional[str] = None
     prop_type: str
-    description: Optional[str]
-    default: Optional[str]
+    description: Optional[str] = None
+    default: Optional[str] = None
 class JsonSchema(BaseModel):
     title: str
     properties: List[Property]
-    schema_type: Optional[str]
 class OcrStrategy(str, Enum):
     ALL = "All"
@@ -121,10 +120,12 @@ class Configuration(BaseModel):
     ocr_strategy: Optional[OcrStrategy] = Field(default=None)
     segment_processing: Optional[SegmentProcessing] = Field(default=None)
     segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
-    target_chunk_length: Optional[int] = Field(default=None)
-class Status(str, Enum):
-    STARTING = "Starting"
-    PROCESSING = "Processing"
-    SUCCEEDED = "Succeeded"
-    FAILED = "Failed"
+    @model_validator(mode='before')
+    def map_deprecated_fields(cls, values: Dict) -> Dict:
+        if isinstance(values, dict) and "target_chunk_length" in values:
+            target_length = values.pop("target_chunk_length")
+            if target_length is not None:
+                values["chunk_processing"] = values.get("chunk_processing", {}) or {}
+                values["chunk_processing"]["target_length"] = target_length
+        return values

chunkr_ai/api/task.py CHANGED Viewed

@@ -1,11 +1,18 @@
 from .protocol import ChunkrClientProtocol
-from .config import Configuration, Status, OutputResponse
+from .config import Configuration, OutputResponse
 import asyncio
 from datetime import datetime
+from enum import Enum
 from pydantic import BaseModel, PrivateAttr
 import time
 from typing import Optional, Union
+class Status(str, Enum):
+    STARTING = "Starting"
+    PROCESSING = "Processing"
+    SUCCEEDED = "Succeeded"
+    FAILED = "Failed"
 class TaskResponse(BaseModel):
     configuration: Configuration
     created_at: datetime

chunkr_ai/models.py CHANGED Viewed

@@ -18,10 +18,9 @@ from .api.config import (
     SegmentProcessing,
     SegmentType,
     SegmentationStrategy,
-    Status
 )
-from .api.task import TaskResponse, TaskPayload
+from .api.task import TaskResponse, TaskPayload, Status
 __all__ = [
     'BoundingBox',

{chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: chunkr-ai
-Version: 0.0.3
+Version: 0.0.5
 Summary: Python client for Chunkr: open source document intelligence
 Author-email: Ishaan Kapoor <ishaan@lumina.sh>
 Project-URL: Homepage, https://chunkr.ai
@@ -17,7 +17,13 @@ Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
 # Chunkr Python Client
-This is the Python client for the Chunkr API. It provides a simple interface to interact with Chunkr's services.
+This provides a simple interface to interact with the Chunkr API.
+## Getting Started
+You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
+For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
 ## Installation
@@ -102,6 +108,80 @@ chunkr.upload(img)
 ### Configuration
+You can customize the processing behavior by passing a `Configuration` object:
+```python
+from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
+# Basic configuration
+config = Configuration(
+    ocr_strategy=OcrStrategy.AUTO,
+    segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
+    high_resolution=True,
+    expires_in=3600,  # seconds
+)
+# Upload with configuration
+task = chunkr.upload("document.pdf", config)
+```
+#### Available Configuration Examples
+- **Chunk Processing**
+  ```python
+  from chunkr_ai.models import ChunkProcessing
+  config = Configuration(
+      chunk_processing=ChunkProcessing(target_length=1024)
+  )
+  ```
+- **Expires In**
+  ```python
+  config = Configuration(expires_in=3600)
+  ```
+- **High Resolution**
+  ```python
+  config = Configuration(high_resolution=True)
+  ```
+- **JSON Schema**
+  ```python
+  config = Configuration(json_schema=JsonSchema(
+      title="Sales Data",
+      properties=[
+          Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
+          Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
+      ]
+  ))
+  ```
+- **OCR Strategy**
+  ```python
+  config = Configuration(ocr_strategy=OcrStrategy.AUTO)
+  ```
+- **Segment Processing**
+  ```python
+  from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
+  config = Configuration(
+      segment_processing=SegmentProcessing(
+          page=GenerationConfig(
+              html=GenerationStrategy.LLM,
+              markdown=GenerationStrategy.LLM
+          )
+      )
+  )
+  ```
+- **Segmentation Strategy**
+  ```python
+  config = Configuration(
+      segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS  # or SegmentationStrategy.PAGE
+  )
+  ```
+## Environment setup
 You can provide your API key and URL in several ways:
 1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
 2. `.env` file
@@ -112,13 +192,3 @@ chunkr = Chunkr(
     url="https://api.chunkr.ai"
 )
 ```
-## Run tests
-```python
-# Install dependencies
-uv pip install -e ".[test]"
-# Run tests
-uv run pytest
-```

{chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
 chunkr_ai/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-chunkr_ai/models.py,sha256=DIKuoLOes6CXIcAQIYDUEZLPUZOT7KKndXu2_ZwNMsk,877
+chunkr_ai/models.py,sha256=d-B4vfgZClJOoHdPaH3vagwUc4qxeQSmUxab77DKYtQ,874
 chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
 chunkr_ai/api/base.py,sha256=WDHx8tU0fl9_-yvYTKL-U0uaxHv-8_bRfiw9Xkl-mWM,6499
 chunkr_ai/api/chunkr.py,sha256=LkBFzGB_T0y3fnBeIn_nwQW6Mb7eZO-iTlzWrmWBoko,3450
 chunkr_ai/api/chunkr_async.py,sha256=B9deRVoe4h3Csh_jEuQxuxQ-DKSuZPdwkanFTyfHmeM,3603
-chunkr_ai/api/config.py,sha256=z7Udwwlw7YD3LahLbgq2fht5v16evK7UBPTgocfxylw,3514
+chunkr_ai/api/config.py,sha256=K0s1giImciPksu-bO9gzRwUaK2Vo1nxNKQkXlRQ2cb8,3785
 chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
-chunkr_ai/api/task.py,sha256=5Mg5u3CVJxmKBKPauogOWX8UEG6zv7-mNd7JHioUILk,4242
-chunkr_ai-0.0.3.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-chunkr_ai-0.0.3.dist-info/METADATA,sha256=vQM4TeWfpaBh3r5ZxVS-S6nROAVoahLnMj2qy3UwgyA,2685
-chunkr_ai-0.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-chunkr_ai-0.0.3.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
-chunkr_ai-0.0.3.dist-info/RECORD,,
+chunkr_ai/api/task.py,sha256=ALU-rYlObbitlM1MKEFeSz_IBUpzb9736Iqu9huWg7c,4392
+chunkr_ai-0.0.5.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+chunkr_ai-0.0.5.dist-info/METADATA,sha256=Roj63O2Ms3D1vNfgEmnCYAJESFrOQ9nnsSlyXkvORU4,4806
+chunkr_ai-0.0.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+chunkr_ai-0.0.5.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
+chunkr_ai-0.0.5.dist-info/RECORD,,

{chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{chunkr_ai-0.0.3.dist-info → chunkr_ai-0.0.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

chunkr-ai 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

chunkr-ai 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl