chunkr-ai 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  from pydantic import BaseModel, Field, ConfigDict
2
2
  from enum import Enum
3
3
  from typing import Any, List, Optional, Union
4
- from pydantic import field_validator
4
+ from pydantic import field_validator, field_serializer
5
5
 
6
6
  class GenerationStrategy(str, Enum):
7
7
  LLM = "LLM"
@@ -65,11 +65,7 @@ class TokenizerType(BaseModel):
65
65
  return f"string:{self.string_value}"
66
66
  return ""
67
67
 
68
- model_config = ConfigDict(
69
- json_encoders={
70
- 'TokenizerType': lambda v: v.model_dump()
71
- }
72
- )
68
+ model_config = ConfigDict()
73
69
 
74
70
  def model_dump(self, **kwargs):
75
71
  if self.enum_value is not None:
@@ -85,10 +81,13 @@ class ChunkProcessing(BaseModel):
85
81
 
86
82
  model_config = ConfigDict(
87
83
  arbitrary_types_allowed=True,
88
- json_encoders={
89
- TokenizerType: lambda v: v.model_dump()
90
- }
91
84
  )
85
+
86
+ @field_serializer('tokenizer')
87
+ def serialize_tokenizer(self, tokenizer: Optional[TokenizerType], _info):
88
+ if tokenizer is None:
89
+ return None
90
+ return tokenizer.model_dump()
92
91
 
93
92
  @field_validator('tokenizer', mode='before')
94
93
  def validate_tokenizer(cls, v):
@@ -126,6 +125,99 @@ class SegmentationStrategy(str, Enum):
126
125
  LAYOUT_ANALYSIS = "LayoutAnalysis"
127
126
  PAGE = "Page"
128
127
 
128
+ class ErrorHandlingStrategy(str, Enum):
129
+ FAIL = "Fail"
130
+ CONTINUE = "Continue"
131
+
132
+ class FallbackStrategy(BaseModel):
133
+ type: str
134
+ model_id: Optional[str] = None
135
+
136
+ @classmethod
137
+ def none(cls) -> "FallbackStrategy":
138
+ return cls(type="None")
139
+
140
+ @classmethod
141
+ def default(cls) -> "FallbackStrategy":
142
+ return cls(type="Default")
143
+
144
+ @classmethod
145
+ def model(cls, model_id: str) -> "FallbackStrategy":
146
+ return cls(type="Model", model_id=model_id)
147
+
148
+ def __str__(self) -> str:
149
+ if self.type == "Model":
150
+ return f"Model({self.model_id})"
151
+ return self.type
152
+
153
+ def model_dump(self, **kwargs):
154
+ if self.type == "Model":
155
+ return {"Model": self.model_id}
156
+ return self.type
157
+
158
+ @field_validator('type')
159
+ def validate_type(cls, v):
160
+ if v not in ["None", "Default", "Model"]:
161
+ raise ValueError(f"Invalid fallback strategy: {v}")
162
+ return v
163
+
164
+ model_config = ConfigDict()
165
+
166
+ @classmethod
167
+ def model_validate(cls, obj):
168
+ # Handle string values like "None" or "Default"
169
+ if isinstance(obj, str):
170
+ if obj in ["None", "Default"]:
171
+ return cls(type=obj)
172
+ # Try to parse as Enum value if it's not a direct match
173
+ try:
174
+ return cls(type=obj)
175
+ except ValueError:
176
+ pass # Let it fall through to normal validation
177
+
178
+ # Handle dictionary format like {"Model": "model-id"}
179
+ elif isinstance(obj, dict) and len(obj) == 1:
180
+ if "Model" in obj:
181
+ return cls(type="Model", model_id=obj["Model"])
182
+
183
+ # Fall back to normal validation
184
+ return super().model_validate(obj)
185
+
186
+ class LlmProcessing(BaseModel):
187
+ model_id: Optional[str] = None
188
+ fallback_strategy: FallbackStrategy = Field(default_factory=FallbackStrategy.default)
189
+ max_completion_tokens: Optional[int] = None
190
+ temperature: float = 0.0
191
+
192
+ model_config = ConfigDict()
193
+
194
+ @field_serializer('fallback_strategy')
195
+ def serialize_fallback_strategy(self, fallback_strategy: FallbackStrategy, _info):
196
+ return fallback_strategy.model_dump()
197
+
198
+ @field_validator('fallback_strategy', mode='before')
199
+ def validate_fallback_strategy(cls, v):
200
+ if isinstance(v, str):
201
+ if v == "None":
202
+ return FallbackStrategy.none()
203
+ elif v == "Default":
204
+ return FallbackStrategy.default()
205
+ # Try to parse as a model ID if it's not None or Default
206
+ try:
207
+ return FallbackStrategy.model(v)
208
+ except ValueError:
209
+ pass # Let it fall through to normal validation
210
+ # Handle dictionary format like {"Model": "model-id"}
211
+ elif isinstance(v, dict) and len(v) == 1:
212
+ if "Model" in v:
213
+ return FallbackStrategy.model(v["Model"])
214
+ elif "None" in v or v.get("None") is None:
215
+ return FallbackStrategy.none()
216
+ elif "Default" in v or v.get("Default") is None:
217
+ return FallbackStrategy.default()
218
+
219
+ return v
220
+
129
221
  class BoundingBox(BaseModel):
130
222
  left: float
131
223
  top: float
@@ -189,11 +281,13 @@ class Pipeline(str, Enum):
189
281
  class Configuration(BaseModel):
190
282
  chunk_processing: Optional[ChunkProcessing] = None
191
283
  expires_in: Optional[int] = None
284
+ error_handling: Optional[ErrorHandlingStrategy] = None
192
285
  high_resolution: Optional[bool] = None
193
286
  ocr_strategy: Optional[OcrStrategy] = None
194
287
  segment_processing: Optional[SegmentProcessing] = None
195
288
  segmentation_strategy: Optional[SegmentationStrategy] = None
196
289
  pipeline: Optional[Pipeline] = None
290
+ llm_processing: Optional[LlmProcessing] = None
197
291
 
198
292
  class OutputConfiguration(Configuration):
199
293
  input_file_url: Optional[str] = None
chunkr_ai/models.py CHANGED
@@ -5,8 +5,11 @@ from .api.configuration import (
5
5
  Configuration,
6
6
  CroppingStrategy,
7
7
  EmbedSource,
8
+ ErrorHandlingStrategy,
9
+ FallbackStrategy,
8
10
  GenerationStrategy,
9
11
  GenerationConfig,
12
+ LlmProcessing,
10
13
  Model,
11
14
  OCRResult,
12
15
  OcrStrategy,
@@ -29,8 +32,11 @@ __all__ = [
29
32
  "Configuration",
30
33
  "CroppingStrategy",
31
34
  "EmbedSource",
35
+ "ErrorHandlingStrategy",
36
+ "FallbackStrategy",
32
37
  "GenerationConfig",
33
38
  "GenerationStrategy",
39
+ "LlmProcessing",
34
40
  "Model",
35
41
  "OCRResult",
36
42
  "OcrStrategy",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunkr-ai
3
- Version: 0.0.44
3
+ Version: 0.0.46
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -1,16 +1,16 @@
1
1
  chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
- chunkr_ai/models.py,sha256=Pfr8S0qbC5GSgI3zCE63bnBCWOOjiExBXIrSRPyLhkc,864
2
+ chunkr_ai/models.py,sha256=L0L9CjY8SgSh9_Fzvo_nJXqKf_2urZHngMWtBVlAQAo,1006
3
3
  chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
5
5
  chunkr_ai/api/chunkr.py,sha256=BzwcKNCuLfVR-HzgY8tKStsW4pIDVVjBgnEqPLyUUMM,3292
6
6
  chunkr_ai/api/chunkr_base.py,sha256=FDl0Ew8eOY4hur5FFqPENZiq9YQy0G3XWEqcKPeCO-U,6130
7
- chunkr_ai/api/configuration.py,sha256=jHEAz3H9uRh22jpSqnGyCdT4VbkCE_L_1fm0uVlv_1U,6527
7
+ chunkr_ai/api/configuration.py,sha256=aCYi_NjuTDynDc6g_N94jVGTb8SQQaUQ4LM8_a5v29g,9882
8
8
  chunkr_ai/api/decorators.py,sha256=VJX4qGBIL00K2zY8bh5KAMWv7SltJ38TvPJH06FnFss,4415
9
9
  chunkr_ai/api/misc.py,sha256=QN-2YWQ8e3VvvK63Ua-e8jsx6gxVxkO88Z96yWOofu0,3653
10
10
  chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
11
11
  chunkr_ai/api/task_response.py,sha256=6kk9g2f7OZB3PAsmp4Or5A42r1dXTAzWAHEIVtLQ9sA,6545
12
- chunkr_ai-0.0.44.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
- chunkr_ai-0.0.44.dist-info/METADATA,sha256=Gk-DiU78MVBXlYk3BjcKCVGfy1JL-SB0wj8p-ooovYs,7053
14
- chunkr_ai-0.0.44.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
15
- chunkr_ai-0.0.44.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
- chunkr_ai-0.0.44.dist-info/RECORD,,
12
+ chunkr_ai-0.0.46.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
+ chunkr_ai-0.0.46.dist-info/METADATA,sha256=Zjo2enHVCP5x0QqMTcS0k20nAWKogUoL88LZEVFoMZ8,7053
14
+ chunkr_ai-0.0.46.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
15
+ chunkr_ai-0.0.46.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
+ chunkr_ai-0.0.46.dist-info/RECORD,,