chunkr-ai 0.0.45__py3-none-any.whl → 0.0.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/configuration.py +98 -9
- chunkr_ai/models.py +4 -0
- {chunkr_ai-0.0.45.dist-info → chunkr_ai-0.0.46.dist-info}/METADATA +1 -1
- {chunkr_ai-0.0.45.dist-info → chunkr_ai-0.0.46.dist-info}/RECORD +7 -7
- {chunkr_ai-0.0.45.dist-info → chunkr_ai-0.0.46.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.45.dist-info → chunkr_ai-0.0.46.dist-info}/licenses/LICENSE +0 -0
- {chunkr_ai-0.0.45.dist-info → chunkr_ai-0.0.46.dist-info}/top_level.txt +0 -0
chunkr_ai/api/configuration.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from pydantic import BaseModel, Field, ConfigDict
|
2
2
|
from enum import Enum
|
3
3
|
from typing import Any, List, Optional, Union
|
4
|
-
from pydantic import field_validator
|
4
|
+
from pydantic import field_validator, field_serializer
|
5
5
|
|
6
6
|
class GenerationStrategy(str, Enum):
|
7
7
|
LLM = "LLM"
|
@@ -65,11 +65,7 @@ class TokenizerType(BaseModel):
|
|
65
65
|
return f"string:{self.string_value}"
|
66
66
|
return ""
|
67
67
|
|
68
|
-
model_config = ConfigDict(
|
69
|
-
json_encoders={
|
70
|
-
'TokenizerType': lambda v: v.model_dump()
|
71
|
-
}
|
72
|
-
)
|
68
|
+
model_config = ConfigDict()
|
73
69
|
|
74
70
|
def model_dump(self, **kwargs):
|
75
71
|
if self.enum_value is not None:
|
@@ -85,10 +81,13 @@ class ChunkProcessing(BaseModel):
|
|
85
81
|
|
86
82
|
model_config = ConfigDict(
|
87
83
|
arbitrary_types_allowed=True,
|
88
|
-
json_encoders={
|
89
|
-
TokenizerType: lambda v: v.model_dump()
|
90
|
-
}
|
91
84
|
)
|
85
|
+
|
86
|
+
@field_serializer('tokenizer')
|
87
|
+
def serialize_tokenizer(self, tokenizer: Optional[TokenizerType], _info):
|
88
|
+
if tokenizer is None:
|
89
|
+
return None
|
90
|
+
return tokenizer.model_dump()
|
92
91
|
|
93
92
|
@field_validator('tokenizer', mode='before')
|
94
93
|
def validate_tokenizer(cls, v):
|
@@ -130,6 +129,95 @@ class ErrorHandlingStrategy(str, Enum):
|
|
130
129
|
FAIL = "Fail"
|
131
130
|
CONTINUE = "Continue"
|
132
131
|
|
132
|
+
class FallbackStrategy(BaseModel):
|
133
|
+
type: str
|
134
|
+
model_id: Optional[str] = None
|
135
|
+
|
136
|
+
@classmethod
|
137
|
+
def none(cls) -> "FallbackStrategy":
|
138
|
+
return cls(type="None")
|
139
|
+
|
140
|
+
@classmethod
|
141
|
+
def default(cls) -> "FallbackStrategy":
|
142
|
+
return cls(type="Default")
|
143
|
+
|
144
|
+
@classmethod
|
145
|
+
def model(cls, model_id: str) -> "FallbackStrategy":
|
146
|
+
return cls(type="Model", model_id=model_id)
|
147
|
+
|
148
|
+
def __str__(self) -> str:
|
149
|
+
if self.type == "Model":
|
150
|
+
return f"Model({self.model_id})"
|
151
|
+
return self.type
|
152
|
+
|
153
|
+
def model_dump(self, **kwargs):
|
154
|
+
if self.type == "Model":
|
155
|
+
return {"Model": self.model_id}
|
156
|
+
return self.type
|
157
|
+
|
158
|
+
@field_validator('type')
|
159
|
+
def validate_type(cls, v):
|
160
|
+
if v not in ["None", "Default", "Model"]:
|
161
|
+
raise ValueError(f"Invalid fallback strategy: {v}")
|
162
|
+
return v
|
163
|
+
|
164
|
+
model_config = ConfigDict()
|
165
|
+
|
166
|
+
@classmethod
|
167
|
+
def model_validate(cls, obj):
|
168
|
+
# Handle string values like "None" or "Default"
|
169
|
+
if isinstance(obj, str):
|
170
|
+
if obj in ["None", "Default"]:
|
171
|
+
return cls(type=obj)
|
172
|
+
# Try to parse as Enum value if it's not a direct match
|
173
|
+
try:
|
174
|
+
return cls(type=obj)
|
175
|
+
except ValueError:
|
176
|
+
pass # Let it fall through to normal validation
|
177
|
+
|
178
|
+
# Handle dictionary format like {"Model": "model-id"}
|
179
|
+
elif isinstance(obj, dict) and len(obj) == 1:
|
180
|
+
if "Model" in obj:
|
181
|
+
return cls(type="Model", model_id=obj["Model"])
|
182
|
+
|
183
|
+
# Fall back to normal validation
|
184
|
+
return super().model_validate(obj)
|
185
|
+
|
186
|
+
class LlmProcessing(BaseModel):
|
187
|
+
model_id: Optional[str] = None
|
188
|
+
fallback_strategy: FallbackStrategy = Field(default_factory=FallbackStrategy.default)
|
189
|
+
max_completion_tokens: Optional[int] = None
|
190
|
+
temperature: float = 0.0
|
191
|
+
|
192
|
+
model_config = ConfigDict()
|
193
|
+
|
194
|
+
@field_serializer('fallback_strategy')
|
195
|
+
def serialize_fallback_strategy(self, fallback_strategy: FallbackStrategy, _info):
|
196
|
+
return fallback_strategy.model_dump()
|
197
|
+
|
198
|
+
@field_validator('fallback_strategy', mode='before')
|
199
|
+
def validate_fallback_strategy(cls, v):
|
200
|
+
if isinstance(v, str):
|
201
|
+
if v == "None":
|
202
|
+
return FallbackStrategy.none()
|
203
|
+
elif v == "Default":
|
204
|
+
return FallbackStrategy.default()
|
205
|
+
# Try to parse as a model ID if it's not None or Default
|
206
|
+
try:
|
207
|
+
return FallbackStrategy.model(v)
|
208
|
+
except ValueError:
|
209
|
+
pass # Let it fall through to normal validation
|
210
|
+
# Handle dictionary format like {"Model": "model-id"}
|
211
|
+
elif isinstance(v, dict) and len(v) == 1:
|
212
|
+
if "Model" in v:
|
213
|
+
return FallbackStrategy.model(v["Model"])
|
214
|
+
elif "None" in v or v.get("None") is None:
|
215
|
+
return FallbackStrategy.none()
|
216
|
+
elif "Default" in v or v.get("Default") is None:
|
217
|
+
return FallbackStrategy.default()
|
218
|
+
|
219
|
+
return v
|
220
|
+
|
133
221
|
class BoundingBox(BaseModel):
|
134
222
|
left: float
|
135
223
|
top: float
|
@@ -199,6 +287,7 @@ class Configuration(BaseModel):
|
|
199
287
|
segment_processing: Optional[SegmentProcessing] = None
|
200
288
|
segmentation_strategy: Optional[SegmentationStrategy] = None
|
201
289
|
pipeline: Optional[Pipeline] = None
|
290
|
+
llm_processing: Optional[LlmProcessing] = None
|
202
291
|
|
203
292
|
class OutputConfiguration(Configuration):
|
204
293
|
input_file_url: Optional[str] = None
|
chunkr_ai/models.py
CHANGED
@@ -6,8 +6,10 @@ from .api.configuration import (
|
|
6
6
|
CroppingStrategy,
|
7
7
|
EmbedSource,
|
8
8
|
ErrorHandlingStrategy,
|
9
|
+
FallbackStrategy,
|
9
10
|
GenerationStrategy,
|
10
11
|
GenerationConfig,
|
12
|
+
LlmProcessing,
|
11
13
|
Model,
|
12
14
|
OCRResult,
|
13
15
|
OcrStrategy,
|
@@ -31,8 +33,10 @@ __all__ = [
|
|
31
33
|
"CroppingStrategy",
|
32
34
|
"EmbedSource",
|
33
35
|
"ErrorHandlingStrategy",
|
36
|
+
"FallbackStrategy",
|
34
37
|
"GenerationConfig",
|
35
38
|
"GenerationStrategy",
|
39
|
+
"LlmProcessing",
|
36
40
|
"Model",
|
37
41
|
"OCRResult",
|
38
42
|
"OcrStrategy",
|
@@ -1,16 +1,16 @@
|
|
1
1
|
chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
|
2
|
-
chunkr_ai/models.py,sha256=
|
2
|
+
chunkr_ai/models.py,sha256=L0L9CjY8SgSh9_Fzvo_nJXqKf_2urZHngMWtBVlAQAo,1006
|
3
3
|
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
|
5
5
|
chunkr_ai/api/chunkr.py,sha256=BzwcKNCuLfVR-HzgY8tKStsW4pIDVVjBgnEqPLyUUMM,3292
|
6
6
|
chunkr_ai/api/chunkr_base.py,sha256=FDl0Ew8eOY4hur5FFqPENZiq9YQy0G3XWEqcKPeCO-U,6130
|
7
|
-
chunkr_ai/api/configuration.py,sha256=
|
7
|
+
chunkr_ai/api/configuration.py,sha256=aCYi_NjuTDynDc6g_N94jVGTb8SQQaUQ4LM8_a5v29g,9882
|
8
8
|
chunkr_ai/api/decorators.py,sha256=VJX4qGBIL00K2zY8bh5KAMWv7SltJ38TvPJH06FnFss,4415
|
9
9
|
chunkr_ai/api/misc.py,sha256=QN-2YWQ8e3VvvK63Ua-e8jsx6gxVxkO88Z96yWOofu0,3653
|
10
10
|
chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
|
11
11
|
chunkr_ai/api/task_response.py,sha256=6kk9g2f7OZB3PAsmp4Or5A42r1dXTAzWAHEIVtLQ9sA,6545
|
12
|
-
chunkr_ai-0.0.
|
13
|
-
chunkr_ai-0.0.
|
14
|
-
chunkr_ai-0.0.
|
15
|
-
chunkr_ai-0.0.
|
16
|
-
chunkr_ai-0.0.
|
12
|
+
chunkr_ai-0.0.46.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
|
13
|
+
chunkr_ai-0.0.46.dist-info/METADATA,sha256=Zjo2enHVCP5x0QqMTcS0k20nAWKogUoL88LZEVFoMZ8,7053
|
14
|
+
chunkr_ai-0.0.46.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
15
|
+
chunkr_ai-0.0.46.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
16
|
+
chunkr_ai-0.0.46.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|