chunkr-ai 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chunkr_ai/api/config.py CHANGED
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, Field
1
+ from pydantic import BaseModel, Field, model_validator
2
2
  from enum import Enum
3
3
  from typing import Optional, List, Dict
4
4
 
@@ -40,15 +40,14 @@ class ChunkProcessing(BaseModel):
40
40
 
41
41
  class Property(BaseModel):
42
42
  name: str
43
- title: Optional[str]
43
+ title: Optional[str] = None
44
44
  prop_type: str
45
- description: Optional[str]
46
- default: Optional[str]
45
+ description: Optional[str] = None
46
+ default: Optional[str] = None
47
47
 
48
48
  class JsonSchema(BaseModel):
49
49
  title: str
50
50
  properties: List[Property]
51
- schema_type: Optional[str]
52
51
 
53
52
  class OcrStrategy(str, Enum):
54
53
  ALL = "All"
@@ -121,10 +120,12 @@ class Configuration(BaseModel):
121
120
  ocr_strategy: Optional[OcrStrategy] = Field(default=None)
122
121
  segment_processing: Optional[SegmentProcessing] = Field(default=None)
123
122
  segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
124
- target_chunk_length: Optional[int] = Field(default=None)
125
123
 
126
- class Status(str, Enum):
127
- STARTING = "Starting"
128
- PROCESSING = "Processing"
129
- SUCCEEDED = "Succeeded"
130
- FAILED = "Failed"
124
+ @model_validator(mode='before')
125
+ def map_deprecated_fields(cls, values: Dict) -> Dict:
126
+ if isinstance(values, dict) and "target_chunk_length" in values:
127
+ target_length = values.pop("target_chunk_length")
128
+ if target_length is not None:
129
+ values["chunk_processing"] = values.get("chunk_processing", {}) or {}
130
+ values["chunk_processing"]["target_length"] = target_length
131
+ return values
chunkr_ai/api/task.py CHANGED
@@ -1,11 +1,18 @@
1
1
  from .protocol import ChunkrClientProtocol
2
- from .config import Configuration, Status, OutputResponse
2
+ from .config import Configuration, OutputResponse
3
3
  import asyncio
4
4
  from datetime import datetime
5
+ from enum import Enum
5
6
  from pydantic import BaseModel, PrivateAttr
6
7
  import time
7
8
  from typing import Optional, Union
8
9
 
10
+ class Status(str, Enum):
11
+ STARTING = "Starting"
12
+ PROCESSING = "Processing"
13
+ SUCCEEDED = "Succeeded"
14
+ FAILED = "Failed"
15
+
9
16
  class TaskResponse(BaseModel):
10
17
  configuration: Configuration
11
18
  created_at: datetime
chunkr_ai/models.py CHANGED
@@ -18,10 +18,9 @@ from .api.config import (
18
18
  SegmentProcessing,
19
19
  SegmentType,
20
20
  SegmentationStrategy,
21
- Status
22
21
  )
23
22
 
24
- from .api.task import TaskResponse, TaskPayload
23
+ from .api.task import TaskResponse, TaskPayload, Status
25
24
 
26
25
  __all__ = [
27
26
  'BoundingBox',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -17,7 +17,13 @@ Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
17
17
 
18
18
  # Chunkr Python Client
19
19
 
20
- This is the Python client for the Chunkr API. It provides a simple interface to interact with Chunkr's services.
20
+ This provides a simple interface to interact with the Chunkr API.
21
+
22
+ ## Getting Started
23
+
24
+ You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
25
+
26
+ For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
21
27
 
22
28
  ## Installation
23
29
 
@@ -102,6 +108,80 @@ chunkr.upload(img)
102
108
 
103
109
  ### Configuration
104
110
 
111
+ You can customize the processing behavior by passing a `Configuration` object:
112
+
113
+ ```python
114
+ from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
115
+
116
+ # Basic configuration
117
+ config = Configuration(
118
+ ocr_strategy=OcrStrategy.AUTO,
119
+ segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
120
+ high_resolution=True,
121
+ expires_in=3600, # seconds
122
+ )
123
+
124
+ # Upload with configuration
125
+ task = chunkr.upload("document.pdf", config)
126
+ ```
127
+
128
+ #### Available Configuration Examples
129
+
130
+ - **Chunk Processing**
131
+ ```python
132
+ from chunkr_ai.models import ChunkProcessing
133
+ config = Configuration(
134
+ chunk_processing=ChunkProcessing(target_length=1024)
135
+ )
136
+ ```
137
+ - **Expires In**
138
+ ```python
139
+ config = Configuration(expires_in=3600)
140
+ ```
141
+
142
+ - **High Resolution**
143
+ ```python
144
+ config = Configuration(high_resolution=True)
145
+ ```
146
+
147
+ - **JSON Schema**
148
+ ```python
149
+ config = Configuration(json_schema=JsonSchema(
150
+ title="Sales Data",
151
+ properties=[
152
+ Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
153
+ Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
154
+ ]
155
+ ))
156
+ ```
157
+
158
+ - **OCR Strategy**
159
+ ```python
160
+ config = Configuration(ocr_strategy=OcrStrategy.AUTO)
161
+ ```
162
+
163
+ - **Segment Processing**
164
+ ```python
165
+ from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
166
+ config = Configuration(
167
+ segment_processing=SegmentProcessing(
168
+ page=GenerationConfig(
169
+ html=GenerationStrategy.LLM,
170
+ markdown=GenerationStrategy.LLM
171
+ )
172
+ )
173
+ )
174
+ ```
175
+
176
+ - **Segmentation Strategy**
177
+ ```python
178
+ config = Configuration(
179
+ segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
180
+ )
181
+ ```
182
+
183
+ ## Environment setup
184
+
105
185
  You can provide your API key and URL in several ways:
106
186
  1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
107
187
  2. `.env` file
@@ -112,13 +192,3 @@ chunkr = Chunkr(
112
192
  url="https://api.chunkr.ai"
113
193
  )
114
194
  ```
115
-
116
- ## Run tests
117
-
118
- ```python
119
- # Install dependencies
120
- uv pip install -e ".[test]"
121
-
122
- # Run tests
123
- uv run pytest
124
- ```
@@ -1,17 +1,17 @@
1
1
  chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
2
  chunkr_ai/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- chunkr_ai/models.py,sha256=DIKuoLOes6CXIcAQIYDUEZLPUZOT7KKndXu2_ZwNMsk,877
3
+ chunkr_ai/models.py,sha256=d-B4vfgZClJOoHdPaH3vagwUc4qxeQSmUxab77DKYtQ,874
4
4
  chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
7
7
  chunkr_ai/api/base.py,sha256=WDHx8tU0fl9_-yvYTKL-U0uaxHv-8_bRfiw9Xkl-mWM,6499
8
8
  chunkr_ai/api/chunkr.py,sha256=LkBFzGB_T0y3fnBeIn_nwQW6Mb7eZO-iTlzWrmWBoko,3450
9
9
  chunkr_ai/api/chunkr_async.py,sha256=B9deRVoe4h3Csh_jEuQxuxQ-DKSuZPdwkanFTyfHmeM,3603
10
- chunkr_ai/api/config.py,sha256=z7Udwwlw7YD3LahLbgq2fht5v16evK7UBPTgocfxylw,3514
10
+ chunkr_ai/api/config.py,sha256=K0s1giImciPksu-bO9gzRwUaK2Vo1nxNKQkXlRQ2cb8,3785
11
11
  chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
12
- chunkr_ai/api/task.py,sha256=5Mg5u3CVJxmKBKPauogOWX8UEG6zv7-mNd7JHioUILk,4242
13
- chunkr_ai-0.0.3.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- chunkr_ai-0.0.3.dist-info/METADATA,sha256=vQM4TeWfpaBh3r5ZxVS-S6nROAVoahLnMj2qy3UwgyA,2685
15
- chunkr_ai-0.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- chunkr_ai-0.0.3.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
- chunkr_ai-0.0.3.dist-info/RECORD,,
12
+ chunkr_ai/api/task.py,sha256=ALU-rYlObbitlM1MKEFeSz_IBUpzb9736Iqu9huWg7c,4392
13
+ chunkr_ai-0.0.5.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ chunkr_ai-0.0.5.dist-info/METADATA,sha256=Roj63O2Ms3D1vNfgEmnCYAJESFrOQ9nnsSlyXkvORU4,4806
15
+ chunkr_ai-0.0.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
+ chunkr_ai-0.0.5.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
+ chunkr_ai-0.0.5.dist-info/RECORD,,