abstractcore 2.4.4__py3-none-any.whl → 2.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. abstractcore/cli/__init__.py +9 -0
  2. abstractcore/cli/main.py +759 -0
  3. abstractcore/cli/vision_config.py +491 -0
  4. abstractcore/core/interface.py +7 -0
  5. abstractcore/core/session.py +27 -2
  6. abstractcore/media/handlers/__init__.py +16 -0
  7. abstractcore/media/handlers/anthropic_handler.py +326 -0
  8. abstractcore/media/handlers/local_handler.py +541 -0
  9. abstractcore/media/handlers/openai_handler.py +281 -0
  10. abstractcore/media/processors/__init__.py +13 -0
  11. abstractcore/media/processors/image_processor.py +610 -0
  12. abstractcore/media/processors/office_processor.py +490 -0
  13. abstractcore/media/processors/pdf_processor.py +485 -0
  14. abstractcore/media/processors/text_processor.py +557 -0
  15. abstractcore/media/utils/__init__.py +22 -0
  16. abstractcore/media/utils/image_scaler.py +306 -0
  17. abstractcore/providers/anthropic_provider.py +14 -2
  18. abstractcore/providers/base.py +24 -0
  19. abstractcore/providers/huggingface_provider.py +23 -9
  20. abstractcore/providers/lmstudio_provider.py +6 -1
  21. abstractcore/providers/mlx_provider.py +20 -7
  22. abstractcore/providers/ollama_provider.py +6 -1
  23. abstractcore/providers/openai_provider.py +6 -2
  24. abstractcore/tools/common_tools.py +651 -1
  25. abstractcore/utils/version.py +1 -1
  26. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/METADATA +59 -9
  27. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/RECORD +31 -17
  28. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/entry_points.txt +2 -0
  29. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/WHEEL +0 -0
  30. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/licenses/LICENSE +0 -0
  31. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,306 @@
1
+ """
2
+ Image scaling utility for AbstractCore media handling.
3
+
4
+ Provides intelligent image scaling based on model-specific requirements
5
+ and capabilities for vision models.
6
+ """
7
+
8
+ from typing import Tuple, Optional, Union, Dict, Any
9
+ from enum import Enum
10
+ from pathlib import Path
11
+ import logging
12
+
13
+ try:
14
+ from PIL import Image, ImageOps
15
+ PIL_AVAILABLE = True
16
+ except ImportError:
17
+ PIL_AVAILABLE = False
18
+
19
+ from ..base import MediaProcessingError
20
+
21
+
22
+ class ScalingMode(Enum):
23
+ """Image scaling modes."""
24
+ FIT = "fit" # Scale to fit within target size, maintaining aspect ratio
25
+ FILL = "fill" # Scale to fill target size, may crop, maintaining aspect ratio
26
+ STRETCH = "stretch" # Stretch to exact target size, may distort aspect ratio
27
+ PAD = "pad" # Scale to fit and pad with background to exact target size
28
+ CROP_CENTER = "crop_center" # Scale to fill and crop from center
29
+
30
+
31
+ class ModelOptimizedScaler:
32
+ """
33
+ Intelligent image scaler that optimizes images for specific vision models.
34
+
35
+ Uses model capability information to determine optimal scaling strategies.
36
+ """
37
+
38
+ def __init__(self):
39
+ self.logger = logging.getLogger(__name__)
40
+
41
+ if not PIL_AVAILABLE:
42
+ raise MediaProcessingError("PIL (Pillow) is required for image scaling")
43
+
44
+ def get_optimal_resolution(self, model_name: str, original_size: Tuple[int, int],
45
+ model_capabilities: Optional[Dict[str, Any]] = None) -> Tuple[int, int]:
46
+ """
47
+ Get optimal resolution for a specific model.
48
+
49
+ Args:
50
+ model_name: Name of the model
51
+ original_size: Original image size (width, height)
52
+ model_capabilities: Model capability information
53
+
54
+ Returns:
55
+ Optimal target size (width, height)
56
+ """
57
+ if model_capabilities is None:
58
+ model_capabilities = self._get_model_capabilities(model_name)
59
+
60
+ max_resolution = model_capabilities.get("max_image_resolution", "variable")
61
+ image_patch_size = model_capabilities.get("image_patch_size", 16)
62
+ adaptive_windowing = model_capabilities.get("adaptive_windowing", False)
63
+
64
+ # Handle different resolution strategies
65
+ if max_resolution == "variable":
66
+ return self._optimize_variable_resolution(original_size, image_patch_size)
67
+ elif max_resolution == "896x896":
68
+ return (896, 896)
69
+ elif max_resolution == "3584x3584":
70
+ return self._optimize_large_resolution(original_size, (3584, 3584), image_patch_size)
71
+ elif "x" in str(max_resolution):
72
+ # Parse specific resolution like "1568x1568"
73
+ w, h = map(int, str(max_resolution).split("x"))
74
+ return (w, h)
75
+ else:
76
+ # Default fallback
77
+ return self._optimize_variable_resolution(original_size, image_patch_size)
78
+
79
+ def _optimize_variable_resolution(self, original_size: Tuple[int, int],
80
+ patch_size: int = 16) -> Tuple[int, int]:
81
+ """Optimize for variable resolution models like Qwen3-VL."""
82
+ width, height = original_size
83
+
84
+ # For variable resolution, aim for reasonable size that's efficient
85
+ # while maintaining good quality
86
+ max_dimension = 1024 # Good balance for most use cases
87
+
88
+ # Scale down if too large
89
+ if max(width, height) > max_dimension:
90
+ if width > height:
91
+ new_width = max_dimension
92
+ new_height = int(height * (max_dimension / width))
93
+ else:
94
+ new_height = max_dimension
95
+ new_width = int(width * (max_dimension / height))
96
+ else:
97
+ new_width, new_height = width, height
98
+
99
+ # Round to nearest patch size multiple for efficiency
100
+ new_width = ((new_width + patch_size - 1) // patch_size) * patch_size
101
+ new_height = ((new_height + patch_size - 1) // patch_size) * patch_size
102
+
103
+ return (new_width, new_height)
104
+
105
+ def _optimize_large_resolution(self, original_size: Tuple[int, int],
106
+ max_size: Tuple[int, int],
107
+ patch_size: int = 14) -> Tuple[int, int]:
108
+ """Optimize for large resolution models like Qwen2.5-VL."""
109
+ width, height = original_size
110
+ max_width, max_height = max_size
111
+
112
+ # Scale to fit within max size while maintaining aspect ratio
113
+ scale = min(max_width / width, max_height / height)
114
+
115
+ if scale < 1: # Only scale down, never up
116
+ new_width = int(width * scale)
117
+ new_height = int(height * scale)
118
+ else:
119
+ new_width, new_height = width, height
120
+
121
+ # Round to nearest patch size multiple
122
+ new_width = ((new_width + patch_size - 1) // patch_size) * patch_size
123
+ new_height = ((new_height + patch_size - 1) // patch_size) * patch_size
124
+
125
+ return (new_width, new_height)
126
+
127
+ def scale_image(self, image: Image.Image, target_size: Tuple[int, int],
128
+ mode: ScalingMode = ScalingMode.FIT,
129
+ background_color: Tuple[int, int, int] = (255, 255, 255)) -> Image.Image:
130
+ """
131
+ Scale image to target size using specified mode.
132
+
133
+ Args:
134
+ image: PIL Image to scale
135
+ target_size: Target size (width, height)
136
+ mode: Scaling mode
137
+ background_color: Background color for padding (RGB)
138
+
139
+ Returns:
140
+ Scaled PIL Image
141
+ """
142
+ target_width, target_height = target_size
143
+
144
+ if mode == ScalingMode.FIT:
145
+ # Scale to fit within target size, maintaining aspect ratio
146
+ image.thumbnail((target_width, target_height), Image.Resampling.LANCZOS)
147
+ return image
148
+
149
+ elif mode == ScalingMode.FILL:
150
+ # Scale to fill target size, may crop
151
+ return ImageOps.fit(image, target_size, Image.Resampling.LANCZOS)
152
+
153
+ elif mode == ScalingMode.STRETCH:
154
+ # Stretch to exact target size
155
+ return image.resize(target_size, Image.Resampling.LANCZOS)
156
+
157
+ elif mode == ScalingMode.PAD:
158
+ # Scale to fit and pad to exact size
159
+ image.thumbnail((target_width, target_height), Image.Resampling.LANCZOS)
160
+
161
+ # Create new image with background color
162
+ new_image = Image.new('RGB', target_size, background_color)
163
+
164
+ # Paste scaled image centered
165
+ paste_x = (target_width - image.width) // 2
166
+ paste_y = (target_height - image.height) // 2
167
+ new_image.paste(image, (paste_x, paste_y))
168
+
169
+ return new_image
170
+
171
+ elif mode == ScalingMode.CROP_CENTER:
172
+ # Scale to fill and crop from center
173
+ return ImageOps.fit(image, target_size, Image.Resampling.LANCZOS, centering=(0.5, 0.5))
174
+
175
+ else:
176
+ raise MediaProcessingError(f"Unknown scaling mode: {mode}")
177
+
178
+ def scale_for_model(self, image: Image.Image, model_name: str,
179
+ scaling_mode: ScalingMode = ScalingMode.FIT,
180
+ model_capabilities: Optional[Dict[str, Any]] = None) -> Image.Image:
181
+ """
182
+ Scale image optimally for a specific model.
183
+
184
+ Args:
185
+ image: PIL Image to scale
186
+ model_name: Name of the target model
187
+ scaling_mode: How to scale the image
188
+ model_capabilities: Model capability information
189
+
190
+ Returns:
191
+ Optimally scaled PIL Image for the model
192
+ """
193
+ original_size = image.size
194
+ target_size = self.get_optimal_resolution(model_name, original_size, model_capabilities)
195
+
196
+ self.logger.debug(f"Scaling image for {model_name}: {original_size} -> {target_size}")
197
+
198
+ # For fixed resolution models, always use PAD mode to maintain exact size
199
+ if model_capabilities and model_capabilities.get("max_image_resolution") == "896x896":
200
+ scaling_mode = ScalingMode.PAD
201
+
202
+ return self.scale_image(image, target_size, scaling_mode)
203
+
204
+ def _get_model_capabilities(self, model_name: str) -> Dict[str, Any]:
205
+ """
206
+ Get model capabilities from the capabilities JSON.
207
+
208
+ Args:
209
+ model_name: Name of the model
210
+
211
+ Returns:
212
+ Model capabilities dictionary
213
+ """
214
+ try:
215
+ from ..capabilities import get_media_capabilities
216
+ return get_media_capabilities(model_name).__dict__
217
+ except ImportError:
218
+ # Fallback capability detection
219
+ return self._fallback_model_capabilities(model_name)
220
+
221
+ def _fallback_model_capabilities(self, model_name: str) -> Dict[str, Any]:
222
+ """Fallback capability detection when capabilities module not available."""
223
+ model_lower = model_name.lower()
224
+
225
+ # Gemma models - fixed 896x896
226
+ if any(gem in model_lower for gem in ["gemma3", "gemma-3n"]):
227
+ return {
228
+ "max_image_resolution": "896x896",
229
+ "image_patch_size": 16,
230
+ "adaptive_windowing": True
231
+ }
232
+
233
+ # Qwen2.5-VL models - up to 3584x3584
234
+ elif "qwen2.5" in model_lower and "vl" in model_lower:
235
+ return {
236
+ "max_image_resolution": "3584x3584",
237
+ "image_patch_size": 14,
238
+ "pixel_grouping": "28x28"
239
+ }
240
+
241
+ # Qwen3-VL models - variable resolution
242
+ elif "qwen3" in model_lower and "vl" in model_lower:
243
+ return {
244
+ "max_image_resolution": "variable",
245
+ "image_patch_size": 16,
246
+ "pixel_grouping": "32x32"
247
+ }
248
+
249
+ # Claude models - up to 1568x1568
250
+ elif "claude" in model_lower:
251
+ return {
252
+ "max_image_resolution": "1568x1568",
253
+ "image_patch_size": 14
254
+ }
255
+
256
+ # Default fallback
257
+ else:
258
+ return {
259
+ "max_image_resolution": "variable",
260
+ "image_patch_size": 16
261
+ }
262
+
263
+
264
+ # Convenience functions for easy usage
265
+ _scaler_instance = None
266
+
267
+ def get_scaler() -> ModelOptimizedScaler:
268
+ """Get shared scaler instance."""
269
+ global _scaler_instance
270
+ if _scaler_instance is None:
271
+ _scaler_instance = ModelOptimizedScaler()
272
+ return _scaler_instance
273
+
274
+ def scale_image_for_model(image: Union[Image.Image, str, Path],
275
+ model_name: str,
276
+ scaling_mode: ScalingMode = ScalingMode.FIT) -> Image.Image:
277
+ """
278
+ Convenience function to scale an image for a specific model.
279
+
280
+ Args:
281
+ image: PIL Image, or path to image file
282
+ model_name: Name of the target model
283
+ scaling_mode: How to scale the image
284
+
285
+ Returns:
286
+ Optimally scaled PIL Image
287
+ """
288
+ if isinstance(image, (str, Path)):
289
+ image = Image.open(image)
290
+
291
+ scaler = get_scaler()
292
+ return scaler.scale_for_model(image, model_name, scaling_mode)
293
+
294
+ def get_optimal_size_for_model(model_name: str, original_size: Tuple[int, int]) -> Tuple[int, int]:
295
+ """
296
+ Get optimal image size for a specific model.
297
+
298
+ Args:
299
+ model_name: Name of the target model
300
+ original_size: Original image size (width, height)
301
+
302
+ Returns:
303
+ Optimal target size (width, height)
304
+ """
305
+ scaler = get_scaler()
306
+ return scaler.get_optimal_resolution(model_name, original_size)
@@ -47,8 +47,7 @@ class AnthropicProvider(BaseProvider):
47
47
  # Initialize tool handler
48
48
  self.tool_handler = UniversalToolHandler(model)
49
49
 
50
- # Store configuration (remove duplicate max_tokens)
51
- self.temperature = kwargs.get("temperature", 0.7)
50
+ # Store provider-specific configuration
52
51
  self.top_p = kwargs.get("top_p", 1.0)
53
52
  self.top_k = kwargs.get("top_k", None)
54
53
 
@@ -132,6 +131,19 @@ class AnthropicProvider(BaseProvider):
132
131
  if kwargs.get("top_k") or self.top_k:
133
132
  call_params["top_k"] = kwargs.get("top_k", self.top_k)
134
133
 
134
+ # Handle seed parameter (Anthropic doesn't support seed natively)
135
+ seed_value = kwargs.get("seed", self.seed)
136
+ if seed_value is not None:
137
+ import warnings
138
+ warnings.warn(
139
+ f"Seed parameter ({seed_value}) is not supported by Anthropic Claude API. "
140
+ f"For deterministic outputs, use temperature=0.0 which may provide more consistent results, "
141
+ f"though true determinism is not guaranteed.",
142
+ UserWarning,
143
+ stacklevel=3
144
+ )
145
+ self.logger.warning(f"Seed {seed_value} requested but not supported by Anthropic API")
146
+
135
147
  # Handle structured output using the "tool trick"
136
148
  structured_tool_name = None
137
149
  if response_model and PYDANTIC_AVAILABLE:
@@ -570,8 +570,32 @@ class BaseProvider(AbstractCoreInterface, ABC):
570
570
  result_kwargs = kwargs.copy()
571
571
  result_kwargs["max_output_tokens"] = effective_max_output
572
572
 
573
+ # Add unified generation parameters with fallback hierarchy: kwargs → instance → defaults
574
+ result_kwargs["temperature"] = result_kwargs.get("temperature", self.temperature)
575
+ if self.seed is not None:
576
+ result_kwargs["seed"] = result_kwargs.get("seed", self.seed)
577
+
573
578
  return result_kwargs
574
579
 
580
+ def _extract_generation_params(self, **kwargs) -> Dict[str, Any]:
581
+ """
582
+ Extract generation parameters with consistent fallback hierarchy.
583
+
584
+ Returns:
585
+ Dict containing temperature, seed, and other generation parameters
586
+ """
587
+ params = {}
588
+
589
+ # Temperature (always present)
590
+ params["temperature"] = kwargs.get("temperature", self.temperature)
591
+
592
+ # Seed (only if not None)
593
+ seed_value = kwargs.get("seed", self.seed)
594
+ if seed_value is not None:
595
+ params["seed"] = seed_value
596
+
597
+ return params
598
+
575
599
  def _get_provider_max_tokens_param(self, kwargs: Dict[str, Any]) -> int:
576
600
  """
577
601
  Extract the appropriate max tokens parameter for this provider.
@@ -68,6 +68,7 @@ class HuggingFaceProvider(BaseProvider):
68
68
  # Initialize tool handler
69
69
  self.tool_handler = UniversalToolHandler(model)
70
70
 
71
+ # Store provider-specific configuration
71
72
  self.n_gpu_layers = n_gpu_layers
72
73
  self.model_type = None # Will be "transformers" or "gguf"
73
74
  self.device = device
@@ -537,14 +538,15 @@ class HuggingFaceProvider(BaseProvider):
537
538
  # Generation parameters using unified system
538
539
  generation_kwargs = self._prepare_generation_kwargs(**kwargs)
539
540
  max_new_tokens = self._get_provider_max_tokens_param(generation_kwargs)
540
- temperature = kwargs.get("temperature", 0.7)
541
+ temperature = kwargs.get("temperature", self.temperature)
541
542
  top_p = kwargs.get("top_p", 0.9)
543
+ seed_value = kwargs.get("seed", self.seed)
542
544
 
543
545
  try:
544
546
  if stream:
545
- return self._stream_generate_transformers_with_tools(input_text, max_new_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'))
547
+ return self._stream_generate_transformers_with_tools(input_text, max_new_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'), seed_value)
546
548
  else:
547
- response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p)
549
+ response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p, seed_value)
548
550
 
549
551
  # Handle tool execution for prompted models
550
552
  if tools and self.tool_handler.supports_prompted and response.content:
@@ -651,11 +653,16 @@ class HuggingFaceProvider(BaseProvider):
651
653
  generation_kwargs = {
652
654
  "messages": chat_messages,
653
655
  "max_tokens": max_output_tokens, # This is max_output_tokens for llama-cpp
654
- "temperature": kwargs.get("temperature", 0.7),
656
+ "temperature": kwargs.get("temperature", self.temperature),
655
657
  "top_p": kwargs.get("top_p", 0.9),
656
658
  "stream": stream
657
659
  }
658
660
 
661
+ # Add seed if provided (GGUF/llama-cpp supports seed)
662
+ seed_value = kwargs.get("seed", self.seed)
663
+ if seed_value is not None:
664
+ generation_kwargs["seed"] = seed_value
665
+
659
666
  # Handle tools - both native and prompted support
660
667
  has_native_tools = False
661
668
  if tools:
@@ -846,9 +853,16 @@ class HuggingFaceProvider(BaseProvider):
846
853
  )
847
854
 
848
855
  def _single_generate_transformers(self, input_text: str, max_new_tokens: int,
849
- temperature: float, top_p: float) -> GenerateResponse:
856
+ temperature: float, top_p: float, seed: Optional[int] = None) -> GenerateResponse:
850
857
  """Generate single response using transformers (original implementation)"""
851
858
  try:
859
+ # Set seed for deterministic generation if provided
860
+ if seed is not None:
861
+ import torch
862
+ torch.manual_seed(seed)
863
+ if torch.cuda.is_available():
864
+ torch.cuda.manual_seed_all(seed)
865
+
852
866
  outputs = self.pipeline(
853
867
  input_text,
854
868
  max_new_tokens=max_new_tokens,
@@ -902,11 +916,11 @@ class HuggingFaceProvider(BaseProvider):
902
916
  }
903
917
 
904
918
  def _stream_generate_transformers(self, input_text: str, max_new_tokens: int,
905
- temperature: float, top_p: float, tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
919
+ temperature: float, top_p: float, tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
906
920
  """Stream response using transformers (simulated, original implementation) with tool tag rewriting support"""
907
921
  try:
908
922
  # HuggingFace doesn't have native streaming, so we simulate it
909
- full_response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p)
923
+ full_response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p, seed)
910
924
 
911
925
  if full_response.content:
912
926
  # Apply tool tag rewriting if enabled
@@ -1039,12 +1053,12 @@ class HuggingFaceProvider(BaseProvider):
1039
1053
  def _stream_generate_transformers_with_tools(self, input_text: str, max_new_tokens: int,
1040
1054
  temperature: float, top_p: float,
1041
1055
  tools: Optional[List[Dict[str, Any]]] = None,
1042
- tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
1056
+ tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
1043
1057
  """Stream generate with tool execution at the end"""
1044
1058
  collected_content = ""
1045
1059
 
1046
1060
  # Stream the response content
1047
- for chunk in self._stream_generate_transformers(input_text, max_new_tokens, temperature, top_p, tool_call_tags):
1061
+ for chunk in self._stream_generate_transformers(input_text, max_new_tokens, temperature, top_p, tool_call_tags, seed):
1048
1062
  collected_content += chunk.content
1049
1063
  yield chunk
1050
1064
 
@@ -196,11 +196,16 @@ class LMStudioProvider(BaseProvider):
196
196
  "model": self.model,
197
197
  "messages": chat_messages,
198
198
  "stream": stream,
199
- "temperature": kwargs.get("temperature", 0.7),
199
+ "temperature": kwargs.get("temperature", self.temperature),
200
200
  "max_tokens": max_output_tokens, # LMStudio uses max_tokens for output tokens
201
201
  "top_p": kwargs.get("top_p", 0.9),
202
202
  }
203
203
 
204
+ # Add seed if provided (LMStudio supports seed via OpenAI-compatible API)
205
+ seed_value = kwargs.get("seed", self.seed)
206
+ if seed_value is not None:
207
+ payload["seed"] = seed_value
208
+
204
209
  if stream:
205
210
  # Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
206
211
  return self._stream_generate(payload)
@@ -189,14 +189,15 @@ class MLXProvider(BaseProvider):
189
189
  # MLX generation parameters using unified system
190
190
  generation_kwargs = self._prepare_generation_kwargs(**kwargs)
191
191
  max_tokens = self._get_provider_max_tokens_param(generation_kwargs)
192
- temperature = kwargs.get("temperature", 0.7)
192
+ temperature = kwargs.get("temperature", self.temperature)
193
193
  top_p = kwargs.get("top_p", 0.9)
194
+ seed_value = kwargs.get("seed", self.seed)
194
195
 
195
196
  try:
196
197
  if stream:
197
- return self._stream_generate_with_tools(full_prompt, max_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'))
198
+ return self._stream_generate_with_tools(full_prompt, max_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'), seed_value)
198
199
  else:
199
- response = self._single_generate(full_prompt, max_tokens, temperature, top_p)
200
+ response = self._single_generate(full_prompt, max_tokens, temperature, top_p, seed_value)
200
201
 
201
202
  # Handle tool execution for prompted models
202
203
  if tools and self.tool_handler.supports_prompted and response.content:
@@ -256,9 +257,15 @@ class MLXProvider(BaseProvider):
256
257
 
257
258
  return full_prompt
258
259
 
259
- def _single_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float) -> GenerateResponse:
260
+ def _single_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, seed: Optional[int] = None) -> GenerateResponse:
260
261
  """Generate single response"""
261
262
 
263
+ # Handle seed parameter (MLX supports seed via mx.random.seed)
264
+ if seed is not None:
265
+ import mlx.core as mx
266
+ mx.random.seed(seed)
267
+ self.logger.debug(f"Set MLX random seed to {seed} for deterministic generation")
268
+
262
269
  # Try different MLX API signatures
263
270
  try:
264
271
  # Try new mlx-lm API
@@ -305,9 +312,15 @@ class MLXProvider(BaseProvider):
305
312
  "total_tokens": total_tokens
306
313
  }
307
314
 
308
- def _stream_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
315
+ def _stream_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
309
316
  """Generate real streaming response using MLX stream_generate with tool tag rewriting support"""
310
317
  try:
318
+ # Handle seed parameter (MLX supports seed via mx.random.seed)
319
+ if seed is not None:
320
+ import mlx.core as mx
321
+ mx.random.seed(seed)
322
+ self.logger.debug(f"Set MLX random seed to {seed} for deterministic streaming generation")
323
+
311
324
  # Initialize tool tag rewriter if needed
312
325
  rewriter = None
313
326
  buffer = ""
@@ -366,12 +379,12 @@ class MLXProvider(BaseProvider):
366
379
  def _stream_generate_with_tools(self, full_prompt: str, max_tokens: int,
367
380
  temperature: float, top_p: float,
368
381
  tools: Optional[List[Dict[str, Any]]] = None,
369
- tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
382
+ tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
370
383
  """Stream generate with tool execution at the end"""
371
384
  collected_content = ""
372
385
 
373
386
  # Stream the response content
374
- for chunk in self._stream_generate(full_prompt, max_tokens, temperature, top_p, tool_call_tags):
387
+ for chunk in self._stream_generate(full_prompt, max_tokens, temperature, top_p, tool_call_tags, seed):
375
388
  collected_content += chunk.content
376
389
  yield chunk
377
390
 
@@ -132,11 +132,16 @@ class OllamaProvider(BaseProvider):
132
132
  "model": self.model,
133
133
  "stream": stream,
134
134
  "options": {
135
- "temperature": kwargs.get("temperature", 0.7),
135
+ "temperature": kwargs.get("temperature", self.temperature),
136
136
  "num_predict": max_output_tokens, # Ollama uses num_predict for max output tokens
137
137
  }
138
138
  }
139
139
 
140
+ # Add seed if provided (Ollama supports seed for deterministic outputs)
141
+ seed_value = kwargs.get("seed", self.seed)
142
+ if seed_value is not None:
143
+ payload["options"]["seed"] = seed_value
144
+
140
145
  # Add structured output support (Ollama native JSON schema)
141
146
  if response_model and PYDANTIC_AVAILABLE:
142
147
  json_schema = response_model.model_json_schema()
@@ -50,8 +50,7 @@ class OpenAIProvider(BaseProvider):
50
50
  # Preflight check: validate model exists
51
51
  self._validate_model_exists()
52
52
 
53
- # Store configuration (remove duplicate max_tokens)
54
- self.temperature = kwargs.get("temperature", 0.7)
53
+ # Store provider-specific configuration
55
54
  self.top_p = kwargs.get("top_p", 1.0)
56
55
  self.frequency_penalty = kwargs.get("frequency_penalty", 0.0)
57
56
  self.presence_penalty = kwargs.get("presence_penalty", 0.0)
@@ -125,6 +124,11 @@ class OpenAIProvider(BaseProvider):
125
124
  call_params["top_p"] = kwargs.get("top_p", self.top_p)
126
125
  call_params["frequency_penalty"] = kwargs.get("frequency_penalty", self.frequency_penalty)
127
126
  call_params["presence_penalty"] = kwargs.get("presence_penalty", self.presence_penalty)
127
+
128
+ # Add seed if provided (OpenAI supports seed for deterministic outputs)
129
+ seed_value = kwargs.get("seed", self.seed)
130
+ if seed_value is not None:
131
+ call_params["seed"] = seed_value
128
132
 
129
133
  # Handle different token parameter names for different model families
130
134
  if self._uses_max_completion_tokens():