chunkr-ai 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
chunkr_ai/api/config.py CHANGED
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, Field
1
+ from pydantic import BaseModel, Field, model_validator
2
2
  from enum import Enum
3
3
  from typing import Optional, List, Dict
4
4
 
@@ -40,15 +40,14 @@ class ChunkProcessing(BaseModel):
40
40
 
41
41
  class Property(BaseModel):
42
42
  name: str
43
- title: Optional[str]
43
+ title: Optional[str] = None
44
44
  prop_type: str
45
- description: Optional[str]
46
- default: Optional[str]
45
+ description: Optional[str] = None
46
+ default: Optional[str] = None
47
47
 
48
48
  class JsonSchema(BaseModel):
49
49
  title: str
50
50
  properties: List[Property]
51
- schema_type: Optional[str]
52
51
 
53
52
  class OcrStrategy(str, Enum):
54
53
  ALL = "All"
@@ -121,10 +120,12 @@ class Configuration(BaseModel):
121
120
  ocr_strategy: Optional[OcrStrategy] = Field(default=None)
122
121
  segment_processing: Optional[SegmentProcessing] = Field(default=None)
123
122
  segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
124
- target_chunk_length: Optional[int] = Field(default=None)
125
123
 
126
- class Status(str, Enum):
127
- STARTING = "Starting"
128
- PROCESSING = "Processing"
129
- SUCCEEDED = "Succeeded"
130
- FAILED = "Failed"
124
+ @model_validator(mode='before')
125
+ def map_deprecated_fields(cls, values: Dict) -> Dict:
126
+ if isinstance(values, dict) and "target_chunk_length" in values:
127
+ target_length = values.pop("target_chunk_length")
128
+ if target_length is not None:
129
+ values["chunk_processing"] = values.get("chunk_processing", {}) or {}
130
+ values["chunk_processing"]["target_length"] = target_length
131
+ return values
chunkr_ai/api/task.py CHANGED
@@ -1,11 +1,18 @@
1
1
  from .protocol import ChunkrClientProtocol
2
- from .config import Configuration, Status, OutputResponse
2
+ from .config import Configuration, OutputResponse
3
3
  import asyncio
4
4
  from datetime import datetime
5
+ from enum import Enum
5
6
  from pydantic import BaseModel, PrivateAttr
6
7
  import time
7
8
  from typing import Optional, Union
8
9
 
10
+ class Status(str, Enum):
11
+ STARTING = "Starting"
12
+ PROCESSING = "Processing"
13
+ SUCCEEDED = "Succeeded"
14
+ FAILED = "Failed"
15
+
9
16
  class TaskResponse(BaseModel):
10
17
  configuration: Configuration
11
18
  created_at: datetime
chunkr_ai/models.py CHANGED
@@ -18,10 +18,9 @@ from .api.config import (
18
18
  SegmentProcessing,
19
19
  SegmentType,
20
20
  SegmentationStrategy,
21
- Status
22
21
  )
23
22
 
24
- from .api.task import TaskResponse, TaskPayload
23
+ from .api.task import TaskResponse, TaskPayload, Status
25
24
 
26
25
  __all__ = [
27
26
  'BoundingBox',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -17,7 +17,13 @@ Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
17
17
 
18
18
  # Chunkr Python Client
19
19
 
20
- This is the Python client for the Chunkr API. It provides a simple interface to interact with Chunkr's services.
20
+ This provides a simple interface to interact with the Chunkr API.
21
+
22
+ ## Getting Started
23
+
24
+ You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
25
+
26
+ For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
21
27
 
22
28
  ## Installation
23
29
 
@@ -102,6 +108,80 @@ chunkr.upload(img)
102
108
 
103
109
  ### Configuration
104
110
 
111
+ You can customize the processing behavior by passing a `Configuration` object:
112
+
113
+ ```python
114
+ from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
115
+
116
+ # Basic configuration
117
+ config = Configuration(
118
+ ocr_strategy=OcrStrategy.AUTO,
119
+ segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
120
+ high_resolution=True,
121
+ expires_in=3600, # seconds
122
+ )
123
+
124
+ # Upload with configuration
125
+ task = chunkr.upload("document.pdf", config)
126
+ ```
127
+
128
+ #### Available Configuration Examples
129
+
130
+ - **Chunk Processing**
131
+ ```python
132
+ from chunkr_ai.models import ChunkProcessing
133
+ config = Configuration(
134
+ chunk_processing=ChunkProcessing(target_length=1024)
135
+ )
136
+ ```
137
+ - **Expires In**
138
+ ```python
139
+ config = Configuration(expires_in=3600)
140
+ ```
141
+
142
+ - **High Resolution**
143
+ ```python
144
+ config = Configuration(high_resolution=True)
145
+ ```
146
+
147
+ - **JSON Schema**
148
+ ```python
149
+ config = Configuration(json_schema=JsonSchema(
150
+ title="Sales Data",
151
+ properties=[
152
+ Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
153
+ Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
154
+ ]
155
+ ))
156
+ ```
157
+
158
+ - **OCR Strategy**
159
+ ```python
160
+ config = Configuration(ocr_strategy=OcrStrategy.AUTO)
161
+ ```
162
+
163
+ - **Segment Processing**
164
+ ```python
165
+ from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
166
+ config = Configuration(
167
+ segment_processing=SegmentProcessing(
168
+ page=GenerationConfig(
169
+ html=GenerationStrategy.LLM,
170
+ markdown=GenerationStrategy.LLM
171
+ )
172
+ )
173
+ )
174
+ ```
175
+
176
+ - **Segmentation Strategy**
177
+ ```python
178
+ config = Configuration(
179
+ segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
180
+ )
181
+ ```
182
+
183
+ ## Environment setup
184
+
105
185
  You can provide your API key and URL in several ways:
106
186
  1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
107
187
  2. `.env` file
@@ -1,17 +1,17 @@
1
1
  chunkr_ai/__init__.py,sha256=eXygrEhGxxIHXNYIlHF2eied8rGsx2RphgR8Wo4lRyo,110
2
2
  chunkr_ai/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- chunkr_ai/models.py,sha256=DIKuoLOes6CXIcAQIYDUEZLPUZOT7KKndXu2_ZwNMsk,877
3
+ chunkr_ai/models.py,sha256=d-B4vfgZClJOoHdPaH3vagwUc4qxeQSmUxab77DKYtQ,874
4
4
  chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  chunkr_ai/api/api.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  chunkr_ai/api/auth.py,sha256=iSd5Jek2BFaHGw9HY-RrqgwP56BHFU0xbSuJS4fU6AA,425
7
7
  chunkr_ai/api/base.py,sha256=WDHx8tU0fl9_-yvYTKL-U0uaxHv-8_bRfiw9Xkl-mWM,6499
8
8
  chunkr_ai/api/chunkr.py,sha256=LkBFzGB_T0y3fnBeIn_nwQW6Mb7eZO-iTlzWrmWBoko,3450
9
9
  chunkr_ai/api/chunkr_async.py,sha256=B9deRVoe4h3Csh_jEuQxuxQ-DKSuZPdwkanFTyfHmeM,3603
10
- chunkr_ai/api/config.py,sha256=z7Udwwlw7YD3LahLbgq2fht5v16evK7UBPTgocfxylw,3514
10
+ chunkr_ai/api/config.py,sha256=K0s1giImciPksu-bO9gzRwUaK2Vo1nxNKQkXlRQ2cb8,3785
11
11
  chunkr_ai/api/protocol.py,sha256=XKS9RmtvBpJItYhPg18qlOCKpaSHdOuQTRSUxAdUz2g,479
12
- chunkr_ai/api/task.py,sha256=5Mg5u3CVJxmKBKPauogOWX8UEG6zv7-mNd7JHioUILk,4242
13
- chunkr_ai-0.0.3.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- chunkr_ai-0.0.3.dist-info/METADATA,sha256=vQM4TeWfpaBh3r5ZxVS-S6nROAVoahLnMj2qy3UwgyA,2685
15
- chunkr_ai-0.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- chunkr_ai-0.0.3.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
- chunkr_ai-0.0.3.dist-info/RECORD,,
12
+ chunkr_ai/api/task.py,sha256=ALU-rYlObbitlM1MKEFeSz_IBUpzb9736Iqu9huWg7c,4392
13
+ chunkr_ai-0.0.4.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ chunkr_ai-0.0.4.dist-info/METADATA,sha256=7k2zij-F7_Kcs6nFCJMKQW382gFpOOLAnZoOOXFrKFs,4913
15
+ chunkr_ai-0.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
+ chunkr_ai-0.0.4.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
+ chunkr_ai-0.0.4.dist-info/RECORD,,