openevolve 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {openevolve-0.2.2 → openevolve-0.2.4}/PKG-INFO +1 -1
  2. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/_version.py +1 -1
  3. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/config.py +24 -19
  4. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/llm/openai.py +32 -2
  5. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/process_parallel.py +19 -5
  6. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve.egg-info/PKG-INFO +1 -1
  7. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve.egg-info/SOURCES.txt +2 -0
  8. openevolve-0.2.4/tests/test_model_parameter_demo.py +70 -0
  9. openevolve-0.2.4/tests/test_openai_model_detection.py +101 -0
  10. {openevolve-0.2.2 → openevolve-0.2.4}/LICENSE +0 -0
  11. {openevolve-0.2.2 → openevolve-0.2.4}/README.md +0 -0
  12. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/__init__.py +0 -0
  13. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/cli.py +0 -0
  14. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/controller.py +0 -0
  15. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/database.py +0 -0
  16. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/evaluation_result.py +0 -0
  17. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/evaluator.py +0 -0
  18. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/iteration.py +0 -0
  19. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/llm/__init__.py +0 -0
  20. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/llm/base.py +0 -0
  21. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/llm/ensemble.py +0 -0
  22. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/prompt/__init__.py +0 -0
  23. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/prompt/sampler.py +0 -0
  24. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/prompt/templates.py +0 -0
  25. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/utils/__init__.py +0 -0
  26. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/utils/async_utils.py +0 -0
  27. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/utils/code_utils.py +0 -0
  28. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/utils/format_utils.py +0 -0
  29. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve/utils/metrics_utils.py +0 -0
  30. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve.egg-info/dependency_links.txt +0 -0
  31. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve.egg-info/entry_points.txt +0 -0
  32. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve.egg-info/requires.txt +0 -0
  33. {openevolve-0.2.2 → openevolve-0.2.4}/openevolve.egg-info/top_level.txt +0 -0
  34. {openevolve-0.2.2 → openevolve-0.2.4}/pyproject.toml +0 -0
  35. {openevolve-0.2.2 → openevolve-0.2.4}/setup.cfg +0 -0
  36. {openevolve-0.2.2 → openevolve-0.2.4}/setup.py +0 -0
  37. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_artifacts.py +0 -0
  38. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_artifacts_integration.py +0 -0
  39. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_cascade_validation.py +0 -0
  40. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_checkpoint_resume.py +0 -0
  41. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_code_utils.py +0 -0
  42. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_database.py +0 -0
  43. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_database_cleanup.py +0 -0
  44. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_evaluator_timeout.py +0 -0
  45. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_feature_stats_persistence.py +0 -0
  46. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_grid_stability.py +0 -0
  47. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_island_isolation.py +0 -0
  48. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_island_migration.py +0 -0
  49. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_island_parent_consistency.py +0 -0
  50. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_island_tracking.py +0 -0
  51. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_iteration_counting.py +0 -0
  52. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_llm_ensemble.py +0 -0
  53. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_map_elites_features.py +0 -0
  54. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_process_parallel.py +0 -0
  55. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_prompt_sampler.py +0 -0
  56. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_prompt_sampler_comprehensive.py +0 -0
  57. {openevolve-0.2.2 → openevolve-0.2.4}/tests/test_valid_configs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openevolve
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Open-source implementation of AlphaEvolve
5
5
  Author: codelion
6
6
  License: Apache-2.0
@@ -1,3 +1,3 @@
1
1
  """Version information for openevolve package."""
2
2
 
3
- __version__ = "0.2.2"
3
+ __version__ = "0.2.4"
@@ -56,12 +56,7 @@ class LLMConfig(LLMModelConfig):
56
56
  retry_delay: int = 5
57
57
 
58
58
  # n-model configuration for evolution LLM ensemble
59
- models: List[LLMModelConfig] = field(
60
- default_factory=lambda: [
61
- LLMModelConfig(name="gpt-4o-mini", weight=0.8),
62
- LLMModelConfig(name="gpt-4o", weight=0.2),
63
- ]
64
- )
59
+ models: List[LLMModelConfig] = field(default_factory=list)
65
60
 
66
61
  # n-model configuration for evaluator LLM ensemble
67
62
  evaluator_models: List[LLMModelConfig] = field(default_factory=lambda: [])
@@ -75,24 +70,34 @@ class LLMConfig(LLMModelConfig):
75
70
  def __post_init__(self):
76
71
  """Post-initialization to set up model configurations"""
77
72
  # Handle backward compatibility for primary_model(_weight) and secondary_model(_weight).
78
- if (self.primary_model or self.primary_model_weight) and len(self.models) < 1:
79
- # Ensure we have a primary model
80
- self.models.append(LLMModelConfig())
81
73
  if self.primary_model:
82
- self.models[0].name = self.primary_model
83
- if self.primary_model_weight:
84
- self.models[0].weight = self.primary_model_weight
74
+ # Create primary model
75
+ primary_model = LLMModelConfig(
76
+ name=self.primary_model,
77
+ weight=self.primary_model_weight or 1.0
78
+ )
79
+ self.models.append(primary_model)
85
80
 
86
- if (self.secondary_model or self.secondary_model_weight) and len(self.models) < 2:
87
- # Ensure we have a second model
88
- self.models.append(LLMModelConfig())
89
81
  if self.secondary_model:
90
- self.models[1].name = self.secondary_model
91
- if self.secondary_model_weight:
92
- self.models[1].weight = self.secondary_model_weight
82
+ # Create secondary model (only if weight > 0)
83
+ if self.secondary_model_weight is None or self.secondary_model_weight > 0:
84
+ secondary_model = LLMModelConfig(
85
+ name=self.secondary_model,
86
+ weight=self.secondary_model_weight if self.secondary_model_weight is not None else 0.2
87
+ )
88
+ self.models.append(secondary_model)
89
+
90
+ # Only validate if this looks like a user config (has some model info)
91
+ # Don't validate during internal/default initialization
92
+ if (self.primary_model or self.secondary_model or
93
+ self.primary_model_weight or self.secondary_model_weight) and not self.models:
94
+ raise ValueError(
95
+ "No LLM models configured. Please specify 'models' array or "
96
+ "'primary_model' in your configuration."
97
+ )
93
98
 
94
99
  # If no evaluator models are defined, use the same models as for evolution
95
- if not self.evaluator_models or len(self.evaluator_models) < 1:
100
+ if not self.evaluator_models:
96
101
  self.evaluator_models = self.models.copy()
97
102
 
98
103
  # Update models with shared configuration values
@@ -66,14 +66,40 @@ class OpenAILLM(LLMInterface):
66
66
  formatted_messages.extend(messages)
67
67
 
68
68
  # Set up generation parameters
69
- if self.api_base == "https://api.openai.com/v1" and str(self.model).lower().startswith("o"):
70
- # For o-series models
69
+ # Define OpenAI reasoning models that require max_completion_tokens
70
+ # These models don't support temperature/top_p and use different parameters
71
+ OPENAI_REASONING_MODEL_PREFIXES = (
72
+ # O-series reasoning models
73
+ "o1-", "o1", # o1, o1-mini, o1-preview
74
+ "o3-", "o3", # o3, o3-mini, o3-pro
75
+ "o4-", # o4-mini
76
+ # GPT-5 series are also reasoning models
77
+ "gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano
78
+ # The GPT OSS series are also reasoning models
79
+ "gpt-oss-120b", "gpt-oss-20b"
80
+ )
81
+
82
+ # Check if this is an OpenAI reasoning model
83
+ model_lower = str(self.model).lower()
84
+ is_openai_reasoning_model = (
85
+ self.api_base == "https://api.openai.com/v1" and
86
+ model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
87
+ )
88
+
89
+ if is_openai_reasoning_model:
90
+ # For OpenAI reasoning models
71
91
  params = {
72
92
  "model": self.model,
73
93
  "messages": formatted_messages,
74
94
  "max_completion_tokens": kwargs.get("max_tokens", self.max_tokens),
75
95
  }
96
+ # Add optional reasoning parameters if provided
97
+ if "reasoning_effort" in kwargs:
98
+ params["reasoning_effort"] = kwargs["reasoning_effort"]
99
+ if "verbosity" in kwargs:
100
+ params["verbosity"] = kwargs["verbosity"]
76
101
  else:
102
+ # Standard parameters for all other models
77
103
  params = {
78
104
  "model": self.model,
79
105
  "messages": formatted_messages,
@@ -82,6 +108,10 @@ class OpenAILLM(LLMInterface):
82
108
  "max_tokens": kwargs.get("max_tokens", self.max_tokens),
83
109
  }
84
110
 
111
+ # Handle reasoning_effort for open source reasoning models.
112
+ if "reasoning_effort" in kwargs:
113
+ params["reasoning_effort"] = kwargs["reasoning_effort"]
114
+
85
115
  # Add seed parameter for reproducibility if configured
86
116
  # Skip seed parameter for Google AI Studio endpoint as it doesn't support it
87
117
  seed = kwargs.get("seed", self.random_seed)
@@ -178,12 +178,26 @@ def _run_iteration_worker(
178
178
  iteration_start = time.time()
179
179
 
180
180
  # Generate code modification (sync wrapper for async)
181
- llm_response = asyncio.run(
182
- _worker_llm_ensemble.generate_with_context(
183
- system_message=prompt["system"],
184
- messages=[{"role": "user", "content": prompt["user"]}],
181
+ try:
182
+ llm_response = asyncio.run(
183
+ _worker_llm_ensemble.generate_with_context(
184
+ system_message=prompt["system"],
185
+ messages=[{"role": "user", "content": prompt["user"]}],
186
+ )
187
+ )
188
+ except Exception as e:
189
+ logger.error(f"LLM generation failed: {e}")
190
+ return SerializableResult(
191
+ error=f"LLM generation failed: {str(e)}",
192
+ iteration=iteration
193
+ )
194
+
195
+ # Check for None response
196
+ if llm_response is None:
197
+ return SerializableResult(
198
+ error="LLM returned None response",
199
+ iteration=iteration
185
200
  )
186
- )
187
201
 
188
202
  # Parse response based on evolution mode
189
203
  if _worker_config.diff_based_evolution:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openevolve
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Open-source implementation of AlphaEvolve
5
5
  Author: codelion
6
6
  License: Apache-2.0
@@ -47,6 +47,8 @@ tests/test_island_tracking.py
47
47
  tests/test_iteration_counting.py
48
48
  tests/test_llm_ensemble.py
49
49
  tests/test_map_elites_features.py
50
+ tests/test_model_parameter_demo.py
51
+ tests/test_openai_model_detection.py
50
52
  tests/test_process_parallel.py
51
53
  tests/test_prompt_sampler.py
52
54
  tests/test_prompt_sampler_comprehensive.py
@@ -0,0 +1,70 @@
1
+ """
2
+ Demonstration of fixed OpenAI model parameter handling
3
+ """
4
+
5
+ def demo_model_parameter_selection():
6
+ """Demonstrate how different models get different parameters"""
7
+
8
+ # Mock the logic from openai.py
9
+ OPENAI_REASONING_MODEL_PREFIXES = (
10
+ # O-series reasoning models
11
+ "o1-", "o1", # o1, o1-mini, o1-preview
12
+ "o3-", "o3", # o3, o3-mini, o3-pro
13
+ "o4-", # o4-mini
14
+ # GPT-5 series are also reasoning models
15
+ "gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano
16
+ )
17
+
18
+ def get_params_for_model(model_name, api_base="https://api.openai.com/v1"):
19
+ """Show what parameters would be used for each model"""
20
+ model_lower = str(model_name).lower()
21
+ is_openai_reasoning_model = (
22
+ api_base == "https://api.openai.com/v1" and
23
+ model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
24
+ )
25
+
26
+ if is_openai_reasoning_model:
27
+ return {
28
+ "type": "reasoning_model",
29
+ "uses": "max_completion_tokens",
30
+ "supports": ["reasoning_effort", "verbosity"],
31
+ "excludes": ["temperature", "top_p"]
32
+ }
33
+ else:
34
+ return {
35
+ "type": "standard_model",
36
+ "uses": "max_tokens",
37
+ "supports": ["temperature", "top_p"],
38
+ "excludes": []
39
+ }
40
+
41
+ print("🔧 OpenAI Model Parameter Selection Demo")
42
+ print("=" * 50)
43
+
44
+ test_models = [
45
+ # Reasoning models
46
+ ("o1-mini", "✅ Reasoning"),
47
+ ("o1-preview", "✅ Reasoning"),
48
+ ("o3-mini-2025-01-31", "✅ Reasoning (with date)"),
49
+ ("gpt-5-nano", "✅ Reasoning (GPT-5 series)"),
50
+
51
+ # Standard models
52
+ ("gpt-4o-mini", "❌ Standard (not reasoning)"),
53
+ ("gpt-4o", "❌ Standard"),
54
+ ("gpt-4-turbo", "❌ Standard"),
55
+ ]
56
+
57
+ for model, description in test_models:
58
+ params = get_params_for_model(model)
59
+ print(f"\n📋 Model: {model}")
60
+ print(f" Type: {description}")
61
+ print(f" Uses: {params['uses']}")
62
+ print(f" Supports: {', '.join(params['supports'])}")
63
+ if params['excludes']:
64
+ print(f" Excludes: {', '.join(params['excludes'])}")
65
+
66
+ print("\n" + "=" * 50)
67
+ print("✅ Fix successful! No more false positives/negatives.")
68
+
69
+ if __name__ == "__main__":
70
+ demo_model_parameter_selection()
@@ -0,0 +1,101 @@
1
+ """
2
+ Test OpenAI reasoning model detection logic
3
+ """
4
+
5
+ import unittest
6
+ from unittest.mock import MagicMock
7
+
8
+
9
+ class TestOpenAIReasoningModelDetection(unittest.TestCase):
10
+ """Test that OpenAI reasoning models are correctly identified"""
11
+
12
+ def test_reasoning_model_detection(self):
13
+ """Test various model names to ensure correct reasoning model detection"""
14
+
15
+ # Define the same constants as in the code
16
+ OPENAI_REASONING_MODEL_PREFIXES = (
17
+ # O-series reasoning models
18
+ "o1-", "o1", # o1, o1-mini, o1-preview
19
+ "o3-", "o3", # o3, o3-mini, o3-pro
20
+ "o4-", # o4-mini
21
+ # GPT-5 series are also reasoning models
22
+ "gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano
23
+ )
24
+
25
+ def is_reasoning_model(model_name, api_base="https://api.openai.com/v1"):
26
+ """Test function that mimics the logic in openai.py"""
27
+ model_lower = str(model_name).lower()
28
+ return (
29
+ api_base == "https://api.openai.com/v1" and
30
+ model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
31
+ )
32
+
33
+ # Test cases: (model_name, expected_result, description)
34
+ test_cases = [
35
+ # Reasoning models - should return True
36
+ ("o1", True, "Base o1 model"),
37
+ ("o1-mini", True, "o1-mini model"),
38
+ ("o1-preview", True, "o1-preview model"),
39
+ ("o1-mini-2025-01-31", True, "o1-mini with date"),
40
+ ("o3", True, "Base o3 model"),
41
+ ("o3-mini", True, "o3-mini model"),
42
+ ("o3-pro", True, "o3-pro model"),
43
+ ("o4-mini", True, "o4-mini model"),
44
+ ("gpt-5", True, "Base gpt-5 model"),
45
+ ("gpt-5-mini", True, "gpt-5-mini model"),
46
+ ("gpt-5-nano", True, "gpt-5-nano model"),
47
+
48
+ # Non-reasoning models - should return False
49
+ ("gpt-4o-mini", False, "gpt-4o-mini (not reasoning)"),
50
+ ("gpt-4o", False, "gpt-4o (not reasoning)"),
51
+ ("gpt-4", False, "gpt-4 (not reasoning)"),
52
+ ("gpt-3.5-turbo", False, "gpt-3.5-turbo (not reasoning)"),
53
+ ("claude-3", False, "Non-OpenAI model"),
54
+ ("gemini-pro", False, "Non-OpenAI model"),
55
+
56
+ # Edge cases
57
+ ("O1-MINI", True, "Uppercase o1-mini"),
58
+ ("GPT-5-MINI", True, "Uppercase gpt-5-mini"),
59
+ ]
60
+
61
+ for model_name, expected, description in test_cases:
62
+ with self.subTest(model=model_name, desc=description):
63
+ result = is_reasoning_model(model_name)
64
+ self.assertEqual(
65
+ result,
66
+ expected,
67
+ f"Model '{model_name}' ({description}): expected {expected}, got {result}"
68
+ )
69
+
70
+ def test_non_openai_api_base(self):
71
+ """Test that non-OpenAI API bases don't trigger reasoning model logic"""
72
+ OPENAI_REASONING_MODEL_PREFIXES = (
73
+ "o1-", "o1", "o3-", "o3", "o4-", "gpt-5-", "gpt-5"
74
+ )
75
+
76
+ def is_reasoning_model(model_name, api_base):
77
+ model_lower = str(model_name).lower()
78
+ return (
79
+ api_base == "https://api.openai.com/v1" and
80
+ model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
81
+ )
82
+
83
+ # Even reasoning model names should return False for non-OpenAI APIs
84
+ test_cases = [
85
+ ("o1-mini", "https://api.anthropic.com/v1", False),
86
+ ("gpt-5", "https://generativelanguage.googleapis.com/v1beta/openai/", False),
87
+ ("o3-mini", "https://api.deepseek.com/v1", False),
88
+ ]
89
+
90
+ for model_name, api_base, expected in test_cases:
91
+ with self.subTest(model=model_name, api=api_base):
92
+ result = is_reasoning_model(model_name, api_base)
93
+ self.assertEqual(
94
+ result,
95
+ expected,
96
+ f"Model '{model_name}' with API '{api_base}' should return {expected}"
97
+ )
98
+
99
+
100
+ if __name__ == "__main__":
101
+ unittest.main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes