abstractcore 2.4.4__py3-none-any.whl → 2.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/cli/__init__.py +9 -0
- abstractcore/cli/main.py +759 -0
- abstractcore/cli/vision_config.py +491 -0
- abstractcore/core/interface.py +7 -0
- abstractcore/core/session.py +27 -2
- abstractcore/media/handlers/__init__.py +16 -0
- abstractcore/media/handlers/anthropic_handler.py +326 -0
- abstractcore/media/handlers/local_handler.py +541 -0
- abstractcore/media/handlers/openai_handler.py +281 -0
- abstractcore/media/processors/__init__.py +13 -0
- abstractcore/media/processors/image_processor.py +610 -0
- abstractcore/media/processors/office_processor.py +490 -0
- abstractcore/media/processors/pdf_processor.py +485 -0
- abstractcore/media/processors/text_processor.py +557 -0
- abstractcore/media/utils/__init__.py +22 -0
- abstractcore/media/utils/image_scaler.py +306 -0
- abstractcore/providers/anthropic_provider.py +14 -2
- abstractcore/providers/base.py +24 -0
- abstractcore/providers/huggingface_provider.py +23 -9
- abstractcore/providers/lmstudio_provider.py +6 -1
- abstractcore/providers/mlx_provider.py +20 -7
- abstractcore/providers/ollama_provider.py +6 -1
- abstractcore/providers/openai_provider.py +6 -2
- abstractcore/tools/common_tools.py +651 -1
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/METADATA +59 -9
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/RECORD +31 -17
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/entry_points.txt +2 -0
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/WHEEL +0 -0
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Image scaling utility for AbstractCore media handling.
|
|
3
|
+
|
|
4
|
+
Provides intelligent image scaling based on model-specific requirements
|
|
5
|
+
and capabilities for vision models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Tuple, Optional, Union, Dict, Any
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from PIL import Image, ImageOps
|
|
15
|
+
PIL_AVAILABLE = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
PIL_AVAILABLE = False
|
|
18
|
+
|
|
19
|
+
from ..base import MediaProcessingError
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ScalingMode(Enum):
|
|
23
|
+
"""Image scaling modes."""
|
|
24
|
+
FIT = "fit" # Scale to fit within target size, maintaining aspect ratio
|
|
25
|
+
FILL = "fill" # Scale to fill target size, may crop, maintaining aspect ratio
|
|
26
|
+
STRETCH = "stretch" # Stretch to exact target size, may distort aspect ratio
|
|
27
|
+
PAD = "pad" # Scale to fit and pad with background to exact target size
|
|
28
|
+
CROP_CENTER = "crop_center" # Scale to fill and crop from center
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ModelOptimizedScaler:
|
|
32
|
+
"""
|
|
33
|
+
Intelligent image scaler that optimizes images for specific vision models.
|
|
34
|
+
|
|
35
|
+
Uses model capability information to determine optimal scaling strategies.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self):
|
|
39
|
+
self.logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
if not PIL_AVAILABLE:
|
|
42
|
+
raise MediaProcessingError("PIL (Pillow) is required for image scaling")
|
|
43
|
+
|
|
44
|
+
def get_optimal_resolution(self, model_name: str, original_size: Tuple[int, int],
|
|
45
|
+
model_capabilities: Optional[Dict[str, Any]] = None) -> Tuple[int, int]:
|
|
46
|
+
"""
|
|
47
|
+
Get optimal resolution for a specific model.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
model_name: Name of the model
|
|
51
|
+
original_size: Original image size (width, height)
|
|
52
|
+
model_capabilities: Model capability information
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Optimal target size (width, height)
|
|
56
|
+
"""
|
|
57
|
+
if model_capabilities is None:
|
|
58
|
+
model_capabilities = self._get_model_capabilities(model_name)
|
|
59
|
+
|
|
60
|
+
max_resolution = model_capabilities.get("max_image_resolution", "variable")
|
|
61
|
+
image_patch_size = model_capabilities.get("image_patch_size", 16)
|
|
62
|
+
adaptive_windowing = model_capabilities.get("adaptive_windowing", False)
|
|
63
|
+
|
|
64
|
+
# Handle different resolution strategies
|
|
65
|
+
if max_resolution == "variable":
|
|
66
|
+
return self._optimize_variable_resolution(original_size, image_patch_size)
|
|
67
|
+
elif max_resolution == "896x896":
|
|
68
|
+
return (896, 896)
|
|
69
|
+
elif max_resolution == "3584x3584":
|
|
70
|
+
return self._optimize_large_resolution(original_size, (3584, 3584), image_patch_size)
|
|
71
|
+
elif "x" in str(max_resolution):
|
|
72
|
+
# Parse specific resolution like "1568x1568"
|
|
73
|
+
w, h = map(int, str(max_resolution).split("x"))
|
|
74
|
+
return (w, h)
|
|
75
|
+
else:
|
|
76
|
+
# Default fallback
|
|
77
|
+
return self._optimize_variable_resolution(original_size, image_patch_size)
|
|
78
|
+
|
|
79
|
+
def _optimize_variable_resolution(self, original_size: Tuple[int, int],
|
|
80
|
+
patch_size: int = 16) -> Tuple[int, int]:
|
|
81
|
+
"""Optimize for variable resolution models like Qwen3-VL."""
|
|
82
|
+
width, height = original_size
|
|
83
|
+
|
|
84
|
+
# For variable resolution, aim for reasonable size that's efficient
|
|
85
|
+
# while maintaining good quality
|
|
86
|
+
max_dimension = 1024 # Good balance for most use cases
|
|
87
|
+
|
|
88
|
+
# Scale down if too large
|
|
89
|
+
if max(width, height) > max_dimension:
|
|
90
|
+
if width > height:
|
|
91
|
+
new_width = max_dimension
|
|
92
|
+
new_height = int(height * (max_dimension / width))
|
|
93
|
+
else:
|
|
94
|
+
new_height = max_dimension
|
|
95
|
+
new_width = int(width * (max_dimension / height))
|
|
96
|
+
else:
|
|
97
|
+
new_width, new_height = width, height
|
|
98
|
+
|
|
99
|
+
# Round to nearest patch size multiple for efficiency
|
|
100
|
+
new_width = ((new_width + patch_size - 1) // patch_size) * patch_size
|
|
101
|
+
new_height = ((new_height + patch_size - 1) // patch_size) * patch_size
|
|
102
|
+
|
|
103
|
+
return (new_width, new_height)
|
|
104
|
+
|
|
105
|
+
def _optimize_large_resolution(self, original_size: Tuple[int, int],
|
|
106
|
+
max_size: Tuple[int, int],
|
|
107
|
+
patch_size: int = 14) -> Tuple[int, int]:
|
|
108
|
+
"""Optimize for large resolution models like Qwen2.5-VL."""
|
|
109
|
+
width, height = original_size
|
|
110
|
+
max_width, max_height = max_size
|
|
111
|
+
|
|
112
|
+
# Scale to fit within max size while maintaining aspect ratio
|
|
113
|
+
scale = min(max_width / width, max_height / height)
|
|
114
|
+
|
|
115
|
+
if scale < 1: # Only scale down, never up
|
|
116
|
+
new_width = int(width * scale)
|
|
117
|
+
new_height = int(height * scale)
|
|
118
|
+
else:
|
|
119
|
+
new_width, new_height = width, height
|
|
120
|
+
|
|
121
|
+
# Round to nearest patch size multiple
|
|
122
|
+
new_width = ((new_width + patch_size - 1) // patch_size) * patch_size
|
|
123
|
+
new_height = ((new_height + patch_size - 1) // patch_size) * patch_size
|
|
124
|
+
|
|
125
|
+
return (new_width, new_height)
|
|
126
|
+
|
|
127
|
+
def scale_image(self, image: Image.Image, target_size: Tuple[int, int],
|
|
128
|
+
mode: ScalingMode = ScalingMode.FIT,
|
|
129
|
+
background_color: Tuple[int, int, int] = (255, 255, 255)) -> Image.Image:
|
|
130
|
+
"""
|
|
131
|
+
Scale image to target size using specified mode.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
image: PIL Image to scale
|
|
135
|
+
target_size: Target size (width, height)
|
|
136
|
+
mode: Scaling mode
|
|
137
|
+
background_color: Background color for padding (RGB)
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Scaled PIL Image
|
|
141
|
+
"""
|
|
142
|
+
target_width, target_height = target_size
|
|
143
|
+
|
|
144
|
+
if mode == ScalingMode.FIT:
|
|
145
|
+
# Scale to fit within target size, maintaining aspect ratio
|
|
146
|
+
image.thumbnail((target_width, target_height), Image.Resampling.LANCZOS)
|
|
147
|
+
return image
|
|
148
|
+
|
|
149
|
+
elif mode == ScalingMode.FILL:
|
|
150
|
+
# Scale to fill target size, may crop
|
|
151
|
+
return ImageOps.fit(image, target_size, Image.Resampling.LANCZOS)
|
|
152
|
+
|
|
153
|
+
elif mode == ScalingMode.STRETCH:
|
|
154
|
+
# Stretch to exact target size
|
|
155
|
+
return image.resize(target_size, Image.Resampling.LANCZOS)
|
|
156
|
+
|
|
157
|
+
elif mode == ScalingMode.PAD:
|
|
158
|
+
# Scale to fit and pad to exact size
|
|
159
|
+
image.thumbnail((target_width, target_height), Image.Resampling.LANCZOS)
|
|
160
|
+
|
|
161
|
+
# Create new image with background color
|
|
162
|
+
new_image = Image.new('RGB', target_size, background_color)
|
|
163
|
+
|
|
164
|
+
# Paste scaled image centered
|
|
165
|
+
paste_x = (target_width - image.width) // 2
|
|
166
|
+
paste_y = (target_height - image.height) // 2
|
|
167
|
+
new_image.paste(image, (paste_x, paste_y))
|
|
168
|
+
|
|
169
|
+
return new_image
|
|
170
|
+
|
|
171
|
+
elif mode == ScalingMode.CROP_CENTER:
|
|
172
|
+
# Scale to fill and crop from center
|
|
173
|
+
return ImageOps.fit(image, target_size, Image.Resampling.LANCZOS, centering=(0.5, 0.5))
|
|
174
|
+
|
|
175
|
+
else:
|
|
176
|
+
raise MediaProcessingError(f"Unknown scaling mode: {mode}")
|
|
177
|
+
|
|
178
|
+
def scale_for_model(self, image: Image.Image, model_name: str,
|
|
179
|
+
scaling_mode: ScalingMode = ScalingMode.FIT,
|
|
180
|
+
model_capabilities: Optional[Dict[str, Any]] = None) -> Image.Image:
|
|
181
|
+
"""
|
|
182
|
+
Scale image optimally for a specific model.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
image: PIL Image to scale
|
|
186
|
+
model_name: Name of the target model
|
|
187
|
+
scaling_mode: How to scale the image
|
|
188
|
+
model_capabilities: Model capability information
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Optimally scaled PIL Image for the model
|
|
192
|
+
"""
|
|
193
|
+
original_size = image.size
|
|
194
|
+
target_size = self.get_optimal_resolution(model_name, original_size, model_capabilities)
|
|
195
|
+
|
|
196
|
+
self.logger.debug(f"Scaling image for {model_name}: {original_size} -> {target_size}")
|
|
197
|
+
|
|
198
|
+
# For fixed resolution models, always use PAD mode to maintain exact size
|
|
199
|
+
if model_capabilities and model_capabilities.get("max_image_resolution") == "896x896":
|
|
200
|
+
scaling_mode = ScalingMode.PAD
|
|
201
|
+
|
|
202
|
+
return self.scale_image(image, target_size, scaling_mode)
|
|
203
|
+
|
|
204
|
+
def _get_model_capabilities(self, model_name: str) -> Dict[str, Any]:
|
|
205
|
+
"""
|
|
206
|
+
Get model capabilities from the capabilities JSON.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
model_name: Name of the model
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Model capabilities dictionary
|
|
213
|
+
"""
|
|
214
|
+
try:
|
|
215
|
+
from ..capabilities import get_media_capabilities
|
|
216
|
+
return get_media_capabilities(model_name).__dict__
|
|
217
|
+
except ImportError:
|
|
218
|
+
# Fallback capability detection
|
|
219
|
+
return self._fallback_model_capabilities(model_name)
|
|
220
|
+
|
|
221
|
+
def _fallback_model_capabilities(self, model_name: str) -> Dict[str, Any]:
|
|
222
|
+
"""Fallback capability detection when capabilities module not available."""
|
|
223
|
+
model_lower = model_name.lower()
|
|
224
|
+
|
|
225
|
+
# Gemma models - fixed 896x896
|
|
226
|
+
if any(gem in model_lower for gem in ["gemma3", "gemma-3n"]):
|
|
227
|
+
return {
|
|
228
|
+
"max_image_resolution": "896x896",
|
|
229
|
+
"image_patch_size": 16,
|
|
230
|
+
"adaptive_windowing": True
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
# Qwen2.5-VL models - up to 3584x3584
|
|
234
|
+
elif "qwen2.5" in model_lower and "vl" in model_lower:
|
|
235
|
+
return {
|
|
236
|
+
"max_image_resolution": "3584x3584",
|
|
237
|
+
"image_patch_size": 14,
|
|
238
|
+
"pixel_grouping": "28x28"
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
# Qwen3-VL models - variable resolution
|
|
242
|
+
elif "qwen3" in model_lower and "vl" in model_lower:
|
|
243
|
+
return {
|
|
244
|
+
"max_image_resolution": "variable",
|
|
245
|
+
"image_patch_size": 16,
|
|
246
|
+
"pixel_grouping": "32x32"
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
# Claude models - up to 1568x1568
|
|
250
|
+
elif "claude" in model_lower:
|
|
251
|
+
return {
|
|
252
|
+
"max_image_resolution": "1568x1568",
|
|
253
|
+
"image_patch_size": 14
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
# Default fallback
|
|
257
|
+
else:
|
|
258
|
+
return {
|
|
259
|
+
"max_image_resolution": "variable",
|
|
260
|
+
"image_patch_size": 16
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# Convenience functions for easy usage
|
|
265
|
+
_scaler_instance = None
|
|
266
|
+
|
|
267
|
+
def get_scaler() -> ModelOptimizedScaler:
|
|
268
|
+
"""Get shared scaler instance."""
|
|
269
|
+
global _scaler_instance
|
|
270
|
+
if _scaler_instance is None:
|
|
271
|
+
_scaler_instance = ModelOptimizedScaler()
|
|
272
|
+
return _scaler_instance
|
|
273
|
+
|
|
274
|
+
def scale_image_for_model(image: Union[Image.Image, str, Path],
|
|
275
|
+
model_name: str,
|
|
276
|
+
scaling_mode: ScalingMode = ScalingMode.FIT) -> Image.Image:
|
|
277
|
+
"""
|
|
278
|
+
Convenience function to scale an image for a specific model.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
image: PIL Image, or path to image file
|
|
282
|
+
model_name: Name of the target model
|
|
283
|
+
scaling_mode: How to scale the image
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
Optimally scaled PIL Image
|
|
287
|
+
"""
|
|
288
|
+
if isinstance(image, (str, Path)):
|
|
289
|
+
image = Image.open(image)
|
|
290
|
+
|
|
291
|
+
scaler = get_scaler()
|
|
292
|
+
return scaler.scale_for_model(image, model_name, scaling_mode)
|
|
293
|
+
|
|
294
|
+
def get_optimal_size_for_model(model_name: str, original_size: Tuple[int, int]) -> Tuple[int, int]:
|
|
295
|
+
"""
|
|
296
|
+
Get optimal image size for a specific model.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
model_name: Name of the target model
|
|
300
|
+
original_size: Original image size (width, height)
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
Optimal target size (width, height)
|
|
304
|
+
"""
|
|
305
|
+
scaler = get_scaler()
|
|
306
|
+
return scaler.get_optimal_resolution(model_name, original_size)
|
|
@@ -47,8 +47,7 @@ class AnthropicProvider(BaseProvider):
|
|
|
47
47
|
# Initialize tool handler
|
|
48
48
|
self.tool_handler = UniversalToolHandler(model)
|
|
49
49
|
|
|
50
|
-
# Store configuration
|
|
51
|
-
self.temperature = kwargs.get("temperature", 0.7)
|
|
50
|
+
# Store provider-specific configuration
|
|
52
51
|
self.top_p = kwargs.get("top_p", 1.0)
|
|
53
52
|
self.top_k = kwargs.get("top_k", None)
|
|
54
53
|
|
|
@@ -132,6 +131,19 @@ class AnthropicProvider(BaseProvider):
|
|
|
132
131
|
if kwargs.get("top_k") or self.top_k:
|
|
133
132
|
call_params["top_k"] = kwargs.get("top_k", self.top_k)
|
|
134
133
|
|
|
134
|
+
# Handle seed parameter (Anthropic doesn't support seed natively)
|
|
135
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
136
|
+
if seed_value is not None:
|
|
137
|
+
import warnings
|
|
138
|
+
warnings.warn(
|
|
139
|
+
f"Seed parameter ({seed_value}) is not supported by Anthropic Claude API. "
|
|
140
|
+
f"For deterministic outputs, use temperature=0.0 which may provide more consistent results, "
|
|
141
|
+
f"though true determinism is not guaranteed.",
|
|
142
|
+
UserWarning,
|
|
143
|
+
stacklevel=3
|
|
144
|
+
)
|
|
145
|
+
self.logger.warning(f"Seed {seed_value} requested but not supported by Anthropic API")
|
|
146
|
+
|
|
135
147
|
# Handle structured output using the "tool trick"
|
|
136
148
|
structured_tool_name = None
|
|
137
149
|
if response_model and PYDANTIC_AVAILABLE:
|
abstractcore/providers/base.py
CHANGED
|
@@ -570,8 +570,32 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
570
570
|
result_kwargs = kwargs.copy()
|
|
571
571
|
result_kwargs["max_output_tokens"] = effective_max_output
|
|
572
572
|
|
|
573
|
+
# Add unified generation parameters with fallback hierarchy: kwargs → instance → defaults
|
|
574
|
+
result_kwargs["temperature"] = result_kwargs.get("temperature", self.temperature)
|
|
575
|
+
if self.seed is not None:
|
|
576
|
+
result_kwargs["seed"] = result_kwargs.get("seed", self.seed)
|
|
577
|
+
|
|
573
578
|
return result_kwargs
|
|
574
579
|
|
|
580
|
+
def _extract_generation_params(self, **kwargs) -> Dict[str, Any]:
|
|
581
|
+
"""
|
|
582
|
+
Extract generation parameters with consistent fallback hierarchy.
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
Dict containing temperature, seed, and other generation parameters
|
|
586
|
+
"""
|
|
587
|
+
params = {}
|
|
588
|
+
|
|
589
|
+
# Temperature (always present)
|
|
590
|
+
params["temperature"] = kwargs.get("temperature", self.temperature)
|
|
591
|
+
|
|
592
|
+
# Seed (only if not None)
|
|
593
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
594
|
+
if seed_value is not None:
|
|
595
|
+
params["seed"] = seed_value
|
|
596
|
+
|
|
597
|
+
return params
|
|
598
|
+
|
|
575
599
|
def _get_provider_max_tokens_param(self, kwargs: Dict[str, Any]) -> int:
|
|
576
600
|
"""
|
|
577
601
|
Extract the appropriate max tokens parameter for this provider.
|
|
@@ -68,6 +68,7 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
68
68
|
# Initialize tool handler
|
|
69
69
|
self.tool_handler = UniversalToolHandler(model)
|
|
70
70
|
|
|
71
|
+
# Store provider-specific configuration
|
|
71
72
|
self.n_gpu_layers = n_gpu_layers
|
|
72
73
|
self.model_type = None # Will be "transformers" or "gguf"
|
|
73
74
|
self.device = device
|
|
@@ -537,14 +538,15 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
537
538
|
# Generation parameters using unified system
|
|
538
539
|
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
539
540
|
max_new_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
540
|
-
temperature = kwargs.get("temperature",
|
|
541
|
+
temperature = kwargs.get("temperature", self.temperature)
|
|
541
542
|
top_p = kwargs.get("top_p", 0.9)
|
|
543
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
542
544
|
|
|
543
545
|
try:
|
|
544
546
|
if stream:
|
|
545
|
-
return self._stream_generate_transformers_with_tools(input_text, max_new_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'))
|
|
547
|
+
return self._stream_generate_transformers_with_tools(input_text, max_new_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'), seed_value)
|
|
546
548
|
else:
|
|
547
|
-
response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p)
|
|
549
|
+
response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p, seed_value)
|
|
548
550
|
|
|
549
551
|
# Handle tool execution for prompted models
|
|
550
552
|
if tools and self.tool_handler.supports_prompted and response.content:
|
|
@@ -651,11 +653,16 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
651
653
|
generation_kwargs = {
|
|
652
654
|
"messages": chat_messages,
|
|
653
655
|
"max_tokens": max_output_tokens, # This is max_output_tokens for llama-cpp
|
|
654
|
-
"temperature": kwargs.get("temperature",
|
|
656
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
655
657
|
"top_p": kwargs.get("top_p", 0.9),
|
|
656
658
|
"stream": stream
|
|
657
659
|
}
|
|
658
660
|
|
|
661
|
+
# Add seed if provided (GGUF/llama-cpp supports seed)
|
|
662
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
663
|
+
if seed_value is not None:
|
|
664
|
+
generation_kwargs["seed"] = seed_value
|
|
665
|
+
|
|
659
666
|
# Handle tools - both native and prompted support
|
|
660
667
|
has_native_tools = False
|
|
661
668
|
if tools:
|
|
@@ -846,9 +853,16 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
846
853
|
)
|
|
847
854
|
|
|
848
855
|
def _single_generate_transformers(self, input_text: str, max_new_tokens: int,
|
|
849
|
-
temperature: float, top_p: float) -> GenerateResponse:
|
|
856
|
+
temperature: float, top_p: float, seed: Optional[int] = None) -> GenerateResponse:
|
|
850
857
|
"""Generate single response using transformers (original implementation)"""
|
|
851
858
|
try:
|
|
859
|
+
# Set seed for deterministic generation if provided
|
|
860
|
+
if seed is not None:
|
|
861
|
+
import torch
|
|
862
|
+
torch.manual_seed(seed)
|
|
863
|
+
if torch.cuda.is_available():
|
|
864
|
+
torch.cuda.manual_seed_all(seed)
|
|
865
|
+
|
|
852
866
|
outputs = self.pipeline(
|
|
853
867
|
input_text,
|
|
854
868
|
max_new_tokens=max_new_tokens,
|
|
@@ -902,11 +916,11 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
902
916
|
}
|
|
903
917
|
|
|
904
918
|
def _stream_generate_transformers(self, input_text: str, max_new_tokens: int,
|
|
905
|
-
temperature: float, top_p: float, tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
|
|
919
|
+
temperature: float, top_p: float, tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
|
|
906
920
|
"""Stream response using transformers (simulated, original implementation) with tool tag rewriting support"""
|
|
907
921
|
try:
|
|
908
922
|
# HuggingFace doesn't have native streaming, so we simulate it
|
|
909
|
-
full_response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p)
|
|
923
|
+
full_response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p, seed)
|
|
910
924
|
|
|
911
925
|
if full_response.content:
|
|
912
926
|
# Apply tool tag rewriting if enabled
|
|
@@ -1039,12 +1053,12 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
1039
1053
|
def _stream_generate_transformers_with_tools(self, input_text: str, max_new_tokens: int,
|
|
1040
1054
|
temperature: float, top_p: float,
|
|
1041
1055
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1042
|
-
tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
|
|
1056
|
+
tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
|
|
1043
1057
|
"""Stream generate with tool execution at the end"""
|
|
1044
1058
|
collected_content = ""
|
|
1045
1059
|
|
|
1046
1060
|
# Stream the response content
|
|
1047
|
-
for chunk in self._stream_generate_transformers(input_text, max_new_tokens, temperature, top_p, tool_call_tags):
|
|
1061
|
+
for chunk in self._stream_generate_transformers(input_text, max_new_tokens, temperature, top_p, tool_call_tags, seed):
|
|
1048
1062
|
collected_content += chunk.content
|
|
1049
1063
|
yield chunk
|
|
1050
1064
|
|
|
@@ -196,11 +196,16 @@ class LMStudioProvider(BaseProvider):
|
|
|
196
196
|
"model": self.model,
|
|
197
197
|
"messages": chat_messages,
|
|
198
198
|
"stream": stream,
|
|
199
|
-
"temperature": kwargs.get("temperature",
|
|
199
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
200
200
|
"max_tokens": max_output_tokens, # LMStudio uses max_tokens for output tokens
|
|
201
201
|
"top_p": kwargs.get("top_p", 0.9),
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
+
# Add seed if provided (LMStudio supports seed via OpenAI-compatible API)
|
|
205
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
206
|
+
if seed_value is not None:
|
|
207
|
+
payload["seed"] = seed_value
|
|
208
|
+
|
|
204
209
|
if stream:
|
|
205
210
|
# Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
|
|
206
211
|
return self._stream_generate(payload)
|
|
@@ -189,14 +189,15 @@ class MLXProvider(BaseProvider):
|
|
|
189
189
|
# MLX generation parameters using unified system
|
|
190
190
|
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
191
191
|
max_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
192
|
-
temperature = kwargs.get("temperature",
|
|
192
|
+
temperature = kwargs.get("temperature", self.temperature)
|
|
193
193
|
top_p = kwargs.get("top_p", 0.9)
|
|
194
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
194
195
|
|
|
195
196
|
try:
|
|
196
197
|
if stream:
|
|
197
|
-
return self._stream_generate_with_tools(full_prompt, max_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'))
|
|
198
|
+
return self._stream_generate_with_tools(full_prompt, max_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'), seed_value)
|
|
198
199
|
else:
|
|
199
|
-
response = self._single_generate(full_prompt, max_tokens, temperature, top_p)
|
|
200
|
+
response = self._single_generate(full_prompt, max_tokens, temperature, top_p, seed_value)
|
|
200
201
|
|
|
201
202
|
# Handle tool execution for prompted models
|
|
202
203
|
if tools and self.tool_handler.supports_prompted and response.content:
|
|
@@ -256,9 +257,15 @@ class MLXProvider(BaseProvider):
|
|
|
256
257
|
|
|
257
258
|
return full_prompt
|
|
258
259
|
|
|
259
|
-
def _single_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float) -> GenerateResponse:
|
|
260
|
+
def _single_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, seed: Optional[int] = None) -> GenerateResponse:
|
|
260
261
|
"""Generate single response"""
|
|
261
262
|
|
|
263
|
+
# Handle seed parameter (MLX supports seed via mx.random.seed)
|
|
264
|
+
if seed is not None:
|
|
265
|
+
import mlx.core as mx
|
|
266
|
+
mx.random.seed(seed)
|
|
267
|
+
self.logger.debug(f"Set MLX random seed to {seed} for deterministic generation")
|
|
268
|
+
|
|
262
269
|
# Try different MLX API signatures
|
|
263
270
|
try:
|
|
264
271
|
# Try new mlx-lm API
|
|
@@ -305,9 +312,15 @@ class MLXProvider(BaseProvider):
|
|
|
305
312
|
"total_tokens": total_tokens
|
|
306
313
|
}
|
|
307
314
|
|
|
308
|
-
def _stream_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
|
|
315
|
+
def _stream_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
|
|
309
316
|
"""Generate real streaming response using MLX stream_generate with tool tag rewriting support"""
|
|
310
317
|
try:
|
|
318
|
+
# Handle seed parameter (MLX supports seed via mx.random.seed)
|
|
319
|
+
if seed is not None:
|
|
320
|
+
import mlx.core as mx
|
|
321
|
+
mx.random.seed(seed)
|
|
322
|
+
self.logger.debug(f"Set MLX random seed to {seed} for deterministic streaming generation")
|
|
323
|
+
|
|
311
324
|
# Initialize tool tag rewriter if needed
|
|
312
325
|
rewriter = None
|
|
313
326
|
buffer = ""
|
|
@@ -366,12 +379,12 @@ class MLXProvider(BaseProvider):
|
|
|
366
379
|
def _stream_generate_with_tools(self, full_prompt: str, max_tokens: int,
|
|
367
380
|
temperature: float, top_p: float,
|
|
368
381
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
369
|
-
tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
|
|
382
|
+
tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
|
|
370
383
|
"""Stream generate with tool execution at the end"""
|
|
371
384
|
collected_content = ""
|
|
372
385
|
|
|
373
386
|
# Stream the response content
|
|
374
|
-
for chunk in self._stream_generate(full_prompt, max_tokens, temperature, top_p, tool_call_tags):
|
|
387
|
+
for chunk in self._stream_generate(full_prompt, max_tokens, temperature, top_p, tool_call_tags, seed):
|
|
375
388
|
collected_content += chunk.content
|
|
376
389
|
yield chunk
|
|
377
390
|
|
|
@@ -132,11 +132,16 @@ class OllamaProvider(BaseProvider):
|
|
|
132
132
|
"model": self.model,
|
|
133
133
|
"stream": stream,
|
|
134
134
|
"options": {
|
|
135
|
-
"temperature": kwargs.get("temperature",
|
|
135
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
136
136
|
"num_predict": max_output_tokens, # Ollama uses num_predict for max output tokens
|
|
137
137
|
}
|
|
138
138
|
}
|
|
139
139
|
|
|
140
|
+
# Add seed if provided (Ollama supports seed for deterministic outputs)
|
|
141
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
142
|
+
if seed_value is not None:
|
|
143
|
+
payload["options"]["seed"] = seed_value
|
|
144
|
+
|
|
140
145
|
# Add structured output support (Ollama native JSON schema)
|
|
141
146
|
if response_model and PYDANTIC_AVAILABLE:
|
|
142
147
|
json_schema = response_model.model_json_schema()
|
|
@@ -50,8 +50,7 @@ class OpenAIProvider(BaseProvider):
|
|
|
50
50
|
# Preflight check: validate model exists
|
|
51
51
|
self._validate_model_exists()
|
|
52
52
|
|
|
53
|
-
# Store configuration
|
|
54
|
-
self.temperature = kwargs.get("temperature", 0.7)
|
|
53
|
+
# Store provider-specific configuration
|
|
55
54
|
self.top_p = kwargs.get("top_p", 1.0)
|
|
56
55
|
self.frequency_penalty = kwargs.get("frequency_penalty", 0.0)
|
|
57
56
|
self.presence_penalty = kwargs.get("presence_penalty", 0.0)
|
|
@@ -125,6 +124,11 @@ class OpenAIProvider(BaseProvider):
|
|
|
125
124
|
call_params["top_p"] = kwargs.get("top_p", self.top_p)
|
|
126
125
|
call_params["frequency_penalty"] = kwargs.get("frequency_penalty", self.frequency_penalty)
|
|
127
126
|
call_params["presence_penalty"] = kwargs.get("presence_penalty", self.presence_penalty)
|
|
127
|
+
|
|
128
|
+
# Add seed if provided (OpenAI supports seed for deterministic outputs)
|
|
129
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
130
|
+
if seed_value is not None:
|
|
131
|
+
call_params["seed"] = seed_value
|
|
128
132
|
|
|
129
133
|
# Handle different token parameter names for different model families
|
|
130
134
|
if self._uses_max_completion_tokens():
|