chunkr-ai 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/configuration.py +103 -9
- chunkr_ai/models.py +6 -0
- {chunkr_ai-0.0.44.dist-info → chunkr_ai-0.0.46.dist-info}/METADATA +1 -1
- {chunkr_ai-0.0.44.dist-info → chunkr_ai-0.0.46.dist-info}/RECORD +7 -7
- {chunkr_ai-0.0.44.dist-info → chunkr_ai-0.0.46.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.44.dist-info → chunkr_ai-0.0.46.dist-info}/licenses/LICENSE +0 -0
- {chunkr_ai-0.0.44.dist-info → chunkr_ai-0.0.46.dist-info}/top_level.txt +0 -0
chunkr_ai/api/configuration.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from pydantic import BaseModel, Field, ConfigDict
|
2
2
|
from enum import Enum
|
3
3
|
from typing import Any, List, Optional, Union
|
4
|
-
from pydantic import field_validator
|
4
|
+
from pydantic import field_validator, field_serializer
|
5
5
|
|
6
6
|
class GenerationStrategy(str, Enum):
|
7
7
|
LLM = "LLM"
|
@@ -65,11 +65,7 @@ class TokenizerType(BaseModel):
|
|
65
65
|
return f"string:{self.string_value}"
|
66
66
|
return ""
|
67
67
|
|
68
|
-
model_config = ConfigDict(
|
69
|
-
json_encoders={
|
70
|
-
'TokenizerType': lambda v: v.model_dump()
|
71
|
-
}
|
72
|
-
)
|
68
|
+
model_config = ConfigDict()
|
73
69
|
|
74
70
|
def model_dump(self, **kwargs):
|
75
71
|
if self.enum_value is not None:
|
@@ -85,10 +81,13 @@ class ChunkProcessing(BaseModel):
|
|
85
81
|
|
86
82
|
model_config = ConfigDict(
|
87
83
|
arbitrary_types_allowed=True,
|
88
|
-
json_encoders={
|
89
|
-
TokenizerType: lambda v: v.model_dump()
|
90
|
-
}
|
91
84
|
)
|
85
|
+
|
86
|
+
@field_serializer('tokenizer')
|
87
|
+
def serialize_tokenizer(self, tokenizer: Optional[TokenizerType], _info):
|
88
|
+
if tokenizer is None:
|
89
|
+
return None
|
90
|
+
return tokenizer.model_dump()
|
92
91
|
|
93
92
|
@field_validator('tokenizer', mode='before')
|
94
93
|
def validate_tokenizer(cls, v):
|
@@ -126,6 +125,99 @@ class SegmentationStrategy(str, Enum):
|
|
126
125
|
LAYOUT_ANALYSIS = "LayoutAnalysis"
|
127
126
|
PAGE = "Page"
|
128
127
|
|
128
|
+
class ErrorHandlingStrategy(str, Enum):
|
129
|
+
FAIL = "Fail"
|
130
|
+
CONTINUE = "Continue"
|
131
|
+
|
132
|
+
class FallbackStrategy(BaseModel):
|
133
|
+
type: str
|
134
|
+
model_id: Optional[str] = None
|
135
|
+
|
136
|
+
@classmethod
|
137
|
+
def none(cls) -> "FallbackStrategy":
|
138
|
+
return cls(type="None")
|
139
|
+
|
140
|
+
@classmethod
|
141
|
+
def default(cls) -> "FallbackStrategy":
|
142
|
+
return cls(type="Default")
|
143
|
+
|
144
|
+
@classmethod
|
145
|
+
def model(cls, model_id: str) -> "FallbackStrategy":
|
146
|
+
return cls(type="Model", model_id=model_id)
|
147
|
+
|
148
|
+
def __str__(self) -> str:
|
149
|
+
if self.type == "Model":
|
150
|
+
return f"Model({self.model_id})"
|
151
|
+
return self.type
|
152
|
+
|
153
|
+
def model_dump(self, **kwargs):
|
154
|
+
if self.type == "Model":
|
155
|
+
return {"Model": self.model_id}
|
156
|
+
return self.type
|
157
|
+
|
158
|
+
@field_validator('type')
|
159
|
+
def validate_type(cls, v):
|
160
|
+
if v not in ["None", "Default", "Model"]:
|
161
|
+
raise ValueError(f"Invalid fallback strategy: {v}")
|
162
|
+
return v
|
163
|
+
|
164
|
+
model_config = ConfigDict()
|
165
|
+
|
166
|
+
@classmethod
|
167
|
+
def model_validate(cls, obj):
|
168
|
+
# Handle string values like "None" or "Default"
|
169
|
+
if isinstance(obj, str):
|
170
|
+
if obj in ["None", "Default"]:
|
171
|
+
return cls(type=obj)
|
172
|
+
# Try to parse as Enum value if it's not a direct match
|
173
|
+
try:
|
174
|
+
return cls(type=obj)
|
175
|
+
except ValueError:
|
176
|
+
pass # Let it fall through to normal validation
|
177
|
+
|
178
|
+
# Handle dictionary format like {"Model": "model-id"}
|
179
|
+
elif isinstance(obj, dict) and len(obj) == 1:
|
180
|
+
if "Model" in obj:
|
181
|
+
return cls(type="Model", model_id=obj["Model"])
|
182
|
+
|
183
|
+
# Fall back to normal validation
|
184
|
+
return super().model_validate(obj)
|
185
|
+
|
186
|
+
class LlmProcessing(BaseModel):
|
187
|
+
model_id: Optional[str] = None
|
188
|
+
fallback_strategy: FallbackStrategy = Field(default_factory=FallbackStrategy.default)
|
189
|
+
max_completion_tokens: Optional[int] = None
|
190
|
+
temperature: float = 0.0
|
191
|
+
|
192
|
+
model_config = ConfigDict()
|
193
|
+
|
194
|
+
@field_serializer('fallback_strategy')
|
195
|
+
def serialize_fallback_strategy(self, fallback_strategy: FallbackStrategy, _info):
|
196
|
+
return fallback_strategy.model_dump()
|
197
|
+
|
198
|
+
@field_validator('fallback_strategy', mode='before')
|
199
|
+
def validate_fallback_strategy(cls, v):
|
200
|
+
if isinstance(v, str):
|
201
|
+
if v == "None":
|
202
|
+
return FallbackStrategy.none()
|
203
|
+
elif v == "Default":
|
204
|
+
return FallbackStrategy.default()
|
205
|
+
# Try to parse as a model ID if it's not None or Default
|
206
|
+
try:
|
207
|
+
return FallbackStrategy.model(v)
|
208
|
+
except ValueError:
|
209
|
+
pass # Let it fall through to normal validation
|
210
|
+
# Handle dictionary format like {"Model": "model-id"}
|
211
|
+
elif isinstance(v, dict) and len(v) == 1:
|
212
|
+
if "Model" in v:
|
213
|
+
return FallbackStrategy.model(v["Model"])
|
214
|
+
elif "None" in v or v.get("None") is None:
|
215
|
+
return FallbackStrategy.none()
|
216
|
+
elif "Default" in v or v.get("Default") is None:
|
217
|
+
return FallbackStrategy.default()
|
218
|
+
|
219
|
+
return v
|
220
|
+
|
129
221
|
class BoundingBox(BaseModel):
|
130
222
|
left: float
|
131
223
|
top: float
|
@@ -189,11 +281,13 @@ class Pipeline(str, Enum):
|
|
189
281
|
class Configuration(BaseModel):
|
190
282
|
chunk_processing: Optional[ChunkProcessing] = None
|
191
283
|
expires_in: Optional[int] = None
|
284
|
+
error_handling: Optional[ErrorHandlingStrategy] = None
|
192
285
|
high_resolution: Optional[bool] = None
|
193
286
|
ocr_strategy: Optional[OcrStrategy] = None
|
194
287
|
segment_processing: Optional[SegmentProcessing] = None
|
195
288
|
segmentation_strategy: Optional[SegmentationStrategy] = None
|
196
289
|
pipeline: Optional[Pipeline] = None
|
290
|
+
llm_processing: Optional[LlmProcessing] = None
|
197
291
|
|
198
292
|
class OutputConfiguration(Configuration):
|
199
293
|
input_file_url: Optional[str] = None
|
chunkr_ai/models.py
CHANGED
@@ -5,8 +5,11 @@ from .api.configuration import (
|
|
5
5
|
Configuration,
|
6
6
|
CroppingStrategy,
|
7
7
|
EmbedSource,
|
8
|
+
ErrorHandlingStrategy,
|
9
|
+
FallbackStrategy,
|
8
10
|
GenerationStrategy,
|
9
11
|
GenerationConfig,
|
12
|
+
LlmProcessing,
|
10
13
|
Model,
|
11
14
|
OCRResult,
|
12
15
|
OcrStrategy,
|
@@ -29,8 +32,11 @@ __all__ = [
|
|
29
32
|
"Configuration",
|
30
33
|
"CroppingStrategy",
|
31
34
|
"EmbedSource",
|
35
|
+
"ErrorHandlingStrategy",
|
36
|
+
"FallbackStrategy",
|
32
37
|
"GenerationConfig",
|
33
38
|
"GenerationStrategy",
|
39
|
+
"LlmProcessing",
|
34
40
|
"Model",
|
35
41
|
"OCRResult",
|
36
42
|
"OcrStrategy",
|
@@ -1,16 +1,16 @@
|
|
1
1
|
chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
|
2
|
-
chunkr_ai/models.py,sha256=
|
2
|
+
chunkr_ai/models.py,sha256=L0L9CjY8SgSh9_Fzvo_nJXqKf_2urZHngMWtBVlAQAo,1006
|
3
3
|
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
|
5
5
|
chunkr_ai/api/chunkr.py,sha256=BzwcKNCuLfVR-HzgY8tKStsW4pIDVVjBgnEqPLyUUMM,3292
|
6
6
|
chunkr_ai/api/chunkr_base.py,sha256=FDl0Ew8eOY4hur5FFqPENZiq9YQy0G3XWEqcKPeCO-U,6130
|
7
|
-
chunkr_ai/api/configuration.py,sha256=
|
7
|
+
chunkr_ai/api/configuration.py,sha256=aCYi_NjuTDynDc6g_N94jVGTb8SQQaUQ4LM8_a5v29g,9882
|
8
8
|
chunkr_ai/api/decorators.py,sha256=VJX4qGBIL00K2zY8bh5KAMWv7SltJ38TvPJH06FnFss,4415
|
9
9
|
chunkr_ai/api/misc.py,sha256=QN-2YWQ8e3VvvK63Ua-e8jsx6gxVxkO88Z96yWOofu0,3653
|
10
10
|
chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
|
11
11
|
chunkr_ai/api/task_response.py,sha256=6kk9g2f7OZB3PAsmp4Or5A42r1dXTAzWAHEIVtLQ9sA,6545
|
12
|
-
chunkr_ai-0.0.
|
13
|
-
chunkr_ai-0.0.
|
14
|
-
chunkr_ai-0.0.
|
15
|
-
chunkr_ai-0.0.
|
16
|
-
chunkr_ai-0.0.
|
12
|
+
chunkr_ai-0.0.46.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
|
13
|
+
chunkr_ai-0.0.46.dist-info/METADATA,sha256=Zjo2enHVCP5x0QqMTcS0k20nAWKogUoL88LZEVFoMZ8,7053
|
14
|
+
chunkr_ai-0.0.46.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
15
|
+
chunkr_ai-0.0.46.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
16
|
+
chunkr_ai-0.0.46.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|