morphml 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of morphml might be problematic. Click here for more details.
- morphml/__init__.py +14 -0
- morphml/api/__init__.py +26 -0
- morphml/api/app.py +326 -0
- morphml/api/auth.py +193 -0
- morphml/api/client.py +338 -0
- morphml/api/models.py +132 -0
- morphml/api/rate_limit.py +192 -0
- morphml/benchmarking/__init__.py +36 -0
- morphml/benchmarking/comparison.py +430 -0
- morphml/benchmarks/__init__.py +56 -0
- morphml/benchmarks/comparator.py +409 -0
- morphml/benchmarks/datasets.py +280 -0
- morphml/benchmarks/metrics.py +199 -0
- morphml/benchmarks/openml_suite.py +201 -0
- morphml/benchmarks/problems.py +289 -0
- morphml/benchmarks/suite.py +318 -0
- morphml/cli/__init__.py +5 -0
- morphml/cli/commands/experiment.py +329 -0
- morphml/cli/main.py +457 -0
- morphml/cli/quickstart.py +312 -0
- morphml/config.py +278 -0
- morphml/constraints/__init__.py +19 -0
- morphml/constraints/handler.py +205 -0
- morphml/constraints/predicates.py +285 -0
- morphml/core/__init__.py +3 -0
- morphml/core/crossover.py +449 -0
- morphml/core/dsl/README.md +359 -0
- morphml/core/dsl/__init__.py +72 -0
- morphml/core/dsl/ast_nodes.py +364 -0
- morphml/core/dsl/compiler.py +318 -0
- morphml/core/dsl/layers.py +368 -0
- morphml/core/dsl/lexer.py +336 -0
- morphml/core/dsl/parser.py +455 -0
- morphml/core/dsl/search_space.py +386 -0
- morphml/core/dsl/syntax.py +199 -0
- morphml/core/dsl/type_system.py +361 -0
- morphml/core/dsl/validator.py +386 -0
- morphml/core/graph/__init__.py +40 -0
- morphml/core/graph/edge.py +124 -0
- morphml/core/graph/graph.py +507 -0
- morphml/core/graph/mutations.py +409 -0
- morphml/core/graph/node.py +196 -0
- morphml/core/graph/serialization.py +361 -0
- morphml/core/graph/visualization.py +431 -0
- morphml/core/objectives/__init__.py +20 -0
- morphml/core/search/__init__.py +33 -0
- morphml/core/search/individual.py +252 -0
- morphml/core/search/parameters.py +453 -0
- morphml/core/search/population.py +375 -0
- morphml/core/search/search_engine.py +340 -0
- morphml/distributed/__init__.py +76 -0
- morphml/distributed/fault_tolerance.py +497 -0
- morphml/distributed/health_monitor.py +348 -0
- morphml/distributed/master.py +709 -0
- morphml/distributed/proto/README.md +224 -0
- morphml/distributed/proto/__init__.py +74 -0
- morphml/distributed/proto/worker.proto +170 -0
- morphml/distributed/proto/worker_pb2.py +79 -0
- morphml/distributed/proto/worker_pb2_grpc.py +423 -0
- morphml/distributed/resource_manager.py +416 -0
- morphml/distributed/scheduler.py +567 -0
- morphml/distributed/storage/__init__.py +33 -0
- morphml/distributed/storage/artifacts.py +381 -0
- morphml/distributed/storage/cache.py +366 -0
- morphml/distributed/storage/checkpointing.py +329 -0
- morphml/distributed/storage/database.py +459 -0
- morphml/distributed/worker.py +549 -0
- morphml/evaluation/__init__.py +5 -0
- morphml/evaluation/heuristic.py +237 -0
- morphml/exceptions.py +55 -0
- morphml/execution/__init__.py +5 -0
- morphml/execution/local_executor.py +350 -0
- morphml/integrations/__init__.py +28 -0
- morphml/integrations/jax_adapter.py +206 -0
- morphml/integrations/pytorch_adapter.py +530 -0
- morphml/integrations/sklearn_adapter.py +206 -0
- morphml/integrations/tensorflow_adapter.py +230 -0
- morphml/logging_config.py +93 -0
- morphml/meta_learning/__init__.py +66 -0
- morphml/meta_learning/architecture_similarity.py +277 -0
- morphml/meta_learning/experiment_database.py +240 -0
- morphml/meta_learning/knowledge_base/__init__.py +19 -0
- morphml/meta_learning/knowledge_base/embedder.py +179 -0
- morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
- morphml/meta_learning/knowledge_base/meta_features.py +265 -0
- morphml/meta_learning/knowledge_base/vector_store.py +271 -0
- morphml/meta_learning/predictors/__init__.py +27 -0
- morphml/meta_learning/predictors/ensemble.py +221 -0
- morphml/meta_learning/predictors/gnn_predictor.py +552 -0
- morphml/meta_learning/predictors/learning_curve.py +231 -0
- morphml/meta_learning/predictors/proxy_metrics.py +261 -0
- morphml/meta_learning/strategy_evolution/__init__.py +27 -0
- morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
- morphml/meta_learning/strategy_evolution/bandit.py +276 -0
- morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
- morphml/meta_learning/transfer.py +581 -0
- morphml/meta_learning/warm_start.py +286 -0
- morphml/optimizers/__init__.py +74 -0
- morphml/optimizers/adaptive_operators.py +399 -0
- morphml/optimizers/bayesian/__init__.py +52 -0
- morphml/optimizers/bayesian/acquisition.py +387 -0
- morphml/optimizers/bayesian/base.py +319 -0
- morphml/optimizers/bayesian/gaussian_process.py +635 -0
- morphml/optimizers/bayesian/smac.py +534 -0
- morphml/optimizers/bayesian/tpe.py +411 -0
- morphml/optimizers/differential_evolution.py +220 -0
- morphml/optimizers/evolutionary/__init__.py +61 -0
- morphml/optimizers/evolutionary/cma_es.py +416 -0
- morphml/optimizers/evolutionary/differential_evolution.py +556 -0
- morphml/optimizers/evolutionary/encoding.py +426 -0
- morphml/optimizers/evolutionary/particle_swarm.py +449 -0
- morphml/optimizers/genetic_algorithm.py +486 -0
- morphml/optimizers/gradient_based/__init__.py +22 -0
- morphml/optimizers/gradient_based/darts.py +550 -0
- morphml/optimizers/gradient_based/enas.py +585 -0
- morphml/optimizers/gradient_based/operations.py +474 -0
- morphml/optimizers/gradient_based/utils.py +601 -0
- morphml/optimizers/hill_climbing.py +169 -0
- morphml/optimizers/multi_objective/__init__.py +56 -0
- morphml/optimizers/multi_objective/indicators.py +504 -0
- morphml/optimizers/multi_objective/nsga2.py +647 -0
- morphml/optimizers/multi_objective/visualization.py +427 -0
- morphml/optimizers/nsga2.py +308 -0
- morphml/optimizers/random_search.py +172 -0
- morphml/optimizers/simulated_annealing.py +181 -0
- morphml/plugins/__init__.py +35 -0
- morphml/plugins/custom_evaluator_example.py +81 -0
- morphml/plugins/custom_optimizer_example.py +63 -0
- morphml/plugins/plugin_system.py +454 -0
- morphml/reports/__init__.py +30 -0
- morphml/reports/generator.py +362 -0
- morphml/tracking/__init__.py +7 -0
- morphml/tracking/experiment.py +309 -0
- morphml/tracking/logger.py +301 -0
- morphml/tracking/reporter.py +357 -0
- morphml/utils/__init__.py +6 -0
- morphml/utils/checkpoint.py +189 -0
- morphml/utils/comparison.py +390 -0
- morphml/utils/export.py +407 -0
- morphml/utils/progress.py +392 -0
- morphml/utils/validation.py +392 -0
- morphml/version.py +7 -0
- morphml/visualization/__init__.py +50 -0
- morphml/visualization/analytics.py +423 -0
- morphml/visualization/architecture_diagrams.py +353 -0
- morphml/visualization/architecture_plot.py +223 -0
- morphml/visualization/convergence_plot.py +174 -0
- morphml/visualization/crossover_viz.py +386 -0
- morphml/visualization/graph_viz.py +338 -0
- morphml/visualization/pareto_plot.py +149 -0
- morphml/visualization/plotly_dashboards.py +422 -0
- morphml/visualization/population.py +309 -0
- morphml/visualization/progress.py +260 -0
- morphml-1.0.0.dist-info/METADATA +434 -0
- morphml-1.0.0.dist-info/RECORD +158 -0
- morphml-1.0.0.dist-info/WHEEL +4 -0
- morphml-1.0.0.dist-info/entry_points.txt +3 -0
- morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Adaptive optimizer that switches strategies dynamically.
|
|
2
|
+
|
|
3
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
4
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from morphml.core.dsl import SearchSpace
|
|
10
|
+
from morphml.core.graph import ModelGraph
|
|
11
|
+
from morphml.logging_config import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AdaptiveOptimizer:
|
|
17
|
+
"""
|
|
18
|
+
Adaptive optimizer that switches between strategies.
|
|
19
|
+
|
|
20
|
+
Uses multi-armed bandits to learn which optimizer works best
|
|
21
|
+
and dynamically switches during search.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
search_space: Search space definition
|
|
25
|
+
evaluator: Architecture evaluation function
|
|
26
|
+
strategy_configs: Dict mapping strategy name to config
|
|
27
|
+
selector_type: Type of strategy selector ('ucb', 'thompson', 'epsilon')
|
|
28
|
+
selector_config: Config for selector
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> from morphml.optimizers import GeneticAlgorithm, RandomSearch
|
|
32
|
+
>>>
|
|
33
|
+
>>> configs = {
|
|
34
|
+
... 'ga': {'population_size': 50, 'num_generations': 10},
|
|
35
|
+
... 'random': {'num_samples': 100}
|
|
36
|
+
... }
|
|
37
|
+
>>>
|
|
38
|
+
>>> optimizer = AdaptiveOptimizer(
|
|
39
|
+
... search_space=space,
|
|
40
|
+
... evaluator=evaluator,
|
|
41
|
+
... strategy_configs=configs
|
|
42
|
+
... )
|
|
43
|
+
>>>
|
|
44
|
+
>>> best = optimizer.search(budget=500)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
search_space: SearchSpace,
|
|
50
|
+
evaluator: Callable[[ModelGraph], float],
|
|
51
|
+
strategy_configs: Dict[str, Dict[str, Any]],
|
|
52
|
+
selector_type: str = "ucb",
|
|
53
|
+
selector_config: Optional[Dict[str, Any]] = None,
|
|
54
|
+
):
|
|
55
|
+
"""Initialize adaptive optimizer."""
|
|
56
|
+
self.search_space = search_space
|
|
57
|
+
self.evaluator = evaluator
|
|
58
|
+
self.strategy_configs = strategy_configs
|
|
59
|
+
|
|
60
|
+
# Create strategy selector
|
|
61
|
+
strategies = list(strategy_configs.keys())
|
|
62
|
+
selector_config = selector_config or {}
|
|
63
|
+
|
|
64
|
+
if selector_type == "ucb":
|
|
65
|
+
from morphml.meta_learning.strategy_evolution.bandit import UCBSelector
|
|
66
|
+
|
|
67
|
+
self.selector = UCBSelector(strategies, **selector_config)
|
|
68
|
+
elif selector_type == "thompson":
|
|
69
|
+
from morphml.meta_learning.strategy_evolution.bandit import ThompsonSamplingSelector
|
|
70
|
+
|
|
71
|
+
self.selector = ThompsonSamplingSelector(strategies, **selector_config)
|
|
72
|
+
elif selector_type == "epsilon":
|
|
73
|
+
from morphml.meta_learning.strategy_evolution.bandit import EpsilonGreedySelector
|
|
74
|
+
|
|
75
|
+
self.selector = EpsilonGreedySelector(strategies, **selector_config)
|
|
76
|
+
else:
|
|
77
|
+
raise ValueError(f"Unknown selector type: {selector_type}")
|
|
78
|
+
|
|
79
|
+
# Track progress
|
|
80
|
+
self.history = []
|
|
81
|
+
self.best_fitness = -float("inf")
|
|
82
|
+
self.best_architecture = None
|
|
83
|
+
|
|
84
|
+
logger.info(
|
|
85
|
+
f"Initialized AdaptiveOptimizer with {len(strategies)} strategies: {strategies}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def search(self, budget: int = 500, checkpoint_interval: int = 50) -> ModelGraph:
|
|
89
|
+
"""
|
|
90
|
+
Run adaptive search.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
budget: Total number of evaluations
|
|
94
|
+
checkpoint_interval: How often to report progress
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Best architecture found
|
|
98
|
+
"""
|
|
99
|
+
logger.info(f"Starting adaptive search with budget={budget}")
|
|
100
|
+
|
|
101
|
+
evaluations_used = 0
|
|
102
|
+
|
|
103
|
+
while evaluations_used < budget:
|
|
104
|
+
# Select strategy
|
|
105
|
+
strategy = self.selector.select_strategy()
|
|
106
|
+
|
|
107
|
+
logger.info(f"Evaluations: {evaluations_used}/{budget}, Using strategy: {strategy}")
|
|
108
|
+
|
|
109
|
+
# Run strategy for a batch
|
|
110
|
+
batch_size = min(50, budget - evaluations_used)
|
|
111
|
+
batch_results = self._run_strategy_batch(strategy, batch_size)
|
|
112
|
+
|
|
113
|
+
# Track results
|
|
114
|
+
self.history.extend(batch_results)
|
|
115
|
+
evaluations_used += len(batch_results)
|
|
116
|
+
|
|
117
|
+
# Compute reward (improvement in best fitness)
|
|
118
|
+
prev_best = self.best_fitness
|
|
119
|
+
for arch, fitness in batch_results:
|
|
120
|
+
if fitness > self.best_fitness:
|
|
121
|
+
self.best_fitness = fitness
|
|
122
|
+
self.best_architecture = arch
|
|
123
|
+
|
|
124
|
+
improvement = self.best_fitness - prev_best
|
|
125
|
+
|
|
126
|
+
# Update selector
|
|
127
|
+
self.selector.update(strategy, reward=improvement)
|
|
128
|
+
|
|
129
|
+
# Checkpoint
|
|
130
|
+
if evaluations_used % checkpoint_interval == 0:
|
|
131
|
+
stats = self.selector.get_statistics()
|
|
132
|
+
logger.info(
|
|
133
|
+
f"Progress: {evaluations_used}/{budget}, "
|
|
134
|
+
f"Best fitness: {self.best_fitness:.4f}"
|
|
135
|
+
)
|
|
136
|
+
logger.info(f"Strategy stats: {stats}")
|
|
137
|
+
|
|
138
|
+
# Final summary
|
|
139
|
+
stats = self.selector.get_statistics()
|
|
140
|
+
best_strategy = self.selector.get_best_strategy()
|
|
141
|
+
|
|
142
|
+
logger.info("Search complete!")
|
|
143
|
+
logger.info(f"Best strategy: {best_strategy}")
|
|
144
|
+
logger.info(f"Final statistics: {stats}")
|
|
145
|
+
logger.info(f"Best fitness: {self.best_fitness:.4f}")
|
|
146
|
+
|
|
147
|
+
return self.best_architecture
|
|
148
|
+
|
|
149
|
+
def _run_strategy_batch(self, strategy: str, batch_size: int) -> List[tuple]:
|
|
150
|
+
"""
|
|
151
|
+
Run a strategy for a batch of evaluations.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
strategy: Strategy name
|
|
155
|
+
batch_size: Number of evaluations
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
List of (architecture, fitness) tuples
|
|
159
|
+
"""
|
|
160
|
+
results = []
|
|
161
|
+
|
|
162
|
+
# Create appropriate optimizer
|
|
163
|
+
config = self.strategy_configs[strategy]
|
|
164
|
+
|
|
165
|
+
if strategy == "random":
|
|
166
|
+
# Random sampling
|
|
167
|
+
for _ in range(batch_size):
|
|
168
|
+
arch = self.search_space.sample()
|
|
169
|
+
fitness = self.evaluator(arch)
|
|
170
|
+
results.append((arch, fitness))
|
|
171
|
+
|
|
172
|
+
elif strategy in ["ga", "genetic"]:
|
|
173
|
+
# Genetic algorithm
|
|
174
|
+
from morphml.optimizers import GeneticAlgorithm
|
|
175
|
+
|
|
176
|
+
# Run for mini-generations
|
|
177
|
+
mini_generations = batch_size // config.get("population_size", 20)
|
|
178
|
+
mini_generations = max(1, mini_generations)
|
|
179
|
+
|
|
180
|
+
ga = GeneticAlgorithm(
|
|
181
|
+
search_space=self.search_space,
|
|
182
|
+
evaluator=self.evaluator,
|
|
183
|
+
population_size=config.get("population_size", 20),
|
|
184
|
+
num_generations=mini_generations,
|
|
185
|
+
mutation_prob=config.get("mutation_prob", 0.2),
|
|
186
|
+
crossover_prob=config.get("crossover_prob", 0.8),
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
ga.search()
|
|
190
|
+
|
|
191
|
+
# Extract results
|
|
192
|
+
results = [(ind.architecture, ind.fitness) for ind in ga.history]
|
|
193
|
+
|
|
194
|
+
elif strategy in ["bo", "bayesian"]:
|
|
195
|
+
# Bayesian optimization (simplified)
|
|
196
|
+
# Just use best from random for now
|
|
197
|
+
for _ in range(batch_size):
|
|
198
|
+
arch = self.search_space.sample()
|
|
199
|
+
fitness = self.evaluator(arch)
|
|
200
|
+
results.append((arch, fitness))
|
|
201
|
+
|
|
202
|
+
else:
|
|
203
|
+
logger.warning(f"Unknown strategy {strategy}, using random")
|
|
204
|
+
for _ in range(batch_size):
|
|
205
|
+
arch = self.search_space.sample()
|
|
206
|
+
fitness = self.evaluator(arch)
|
|
207
|
+
results.append((arch, fitness))
|
|
208
|
+
|
|
209
|
+
return results[:batch_size] # Ensure exact batch size
|
|
210
|
+
|
|
211
|
+
def get_search_trajectory(self) -> List[float]:
|
|
212
|
+
"""
|
|
213
|
+
Get fitness trajectory over search.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
List of best fitness values over time
|
|
217
|
+
"""
|
|
218
|
+
trajectory = []
|
|
219
|
+
best_so_far = -float("inf")
|
|
220
|
+
|
|
221
|
+
for _, fitness in self.history:
|
|
222
|
+
if fitness > best_so_far:
|
|
223
|
+
best_so_far = fitness
|
|
224
|
+
trajectory.append(best_so_far)
|
|
225
|
+
|
|
226
|
+
return trajectory
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""Multi-armed bandit algorithms for strategy selection.
|
|
2
|
+
|
|
3
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
4
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Dict, List
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from morphml.logging_config import get_logger
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StrategySelector(ABC):
|
|
18
|
+
"""
|
|
19
|
+
Base class for strategy selection algorithms.
|
|
20
|
+
|
|
21
|
+
Subclasses implement different bandit algorithms:
|
|
22
|
+
- UCB (Upper Confidence Bound)
|
|
23
|
+
- Thompson Sampling
|
|
24
|
+
- Epsilon-Greedy
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, strategies: List[str]):
|
|
28
|
+
"""
|
|
29
|
+
Initialize selector.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
strategies: List of strategy names
|
|
33
|
+
"""
|
|
34
|
+
self.strategies = strategies
|
|
35
|
+
self.num_strategies = len(strategies)
|
|
36
|
+
|
|
37
|
+
# Statistics
|
|
38
|
+
self.counts = np.zeros(self.num_strategies)
|
|
39
|
+
self.rewards = np.zeros(self.num_strategies)
|
|
40
|
+
|
|
41
|
+
logger.info(f"Initialized {self.__class__.__name__} with {self.num_strategies} strategies")
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def select_strategy(self) -> str:
|
|
45
|
+
"""Select a strategy to use."""
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
def update(self, strategy: str, reward: float) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Update statistics after using a strategy.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
strategy: Strategy that was used
|
|
54
|
+
reward: Reward obtained (e.g., fitness improvement)
|
|
55
|
+
"""
|
|
56
|
+
try:
|
|
57
|
+
idx = self.strategies.index(strategy)
|
|
58
|
+
except ValueError:
|
|
59
|
+
logger.warning(f"Unknown strategy: {strategy}")
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
self.counts[idx] += 1
|
|
63
|
+
self.rewards[idx] += reward
|
|
64
|
+
|
|
65
|
+
logger.debug(
|
|
66
|
+
f"Updated {strategy}: pulls={self.counts[idx]:.0f}, "
|
|
67
|
+
f"avg_reward={self.rewards[idx]/self.counts[idx]:.4f}"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def get_statistics(self) -> Dict[str, Dict[str, float]]:
|
|
71
|
+
"""
|
|
72
|
+
Get statistics for all strategies.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dict mapping strategy name to statistics
|
|
76
|
+
"""
|
|
77
|
+
stats = {}
|
|
78
|
+
|
|
79
|
+
for i, strategy in enumerate(self.strategies):
|
|
80
|
+
if self.counts[i] > 0:
|
|
81
|
+
stats[strategy] = {
|
|
82
|
+
"pulls": int(self.counts[i]),
|
|
83
|
+
"total_reward": float(self.rewards[i]),
|
|
84
|
+
"avg_reward": float(self.rewards[i] / self.counts[i]),
|
|
85
|
+
}
|
|
86
|
+
else:
|
|
87
|
+
stats[strategy] = {
|
|
88
|
+
"pulls": 0,
|
|
89
|
+
"total_reward": 0.0,
|
|
90
|
+
"avg_reward": 0.0,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return stats
|
|
94
|
+
|
|
95
|
+
def get_best_strategy(self) -> str:
|
|
96
|
+
"""Get strategy with highest average reward."""
|
|
97
|
+
mean_rewards = self.rewards / (self.counts + 1e-8)
|
|
98
|
+
best_idx = np.argmax(mean_rewards)
|
|
99
|
+
return self.strategies[best_idx]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class UCBSelector(StrategySelector):
|
|
103
|
+
"""
|
|
104
|
+
Upper Confidence Bound (UCB) strategy selector.
|
|
105
|
+
|
|
106
|
+
UCB balances exploration and exploitation using:
|
|
107
|
+
UCB(i) = mean_reward(i) + c * sqrt(log(total) / pulls(i))
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
strategies: List of strategy names
|
|
111
|
+
exploration_factor: Exploration constant (default: 2.0)
|
|
112
|
+
|
|
113
|
+
Example:
|
|
114
|
+
>>> selector = UCBSelector(['GA', 'BO', 'DE'])
|
|
115
|
+
>>> strategy = selector.select_strategy()
|
|
116
|
+
>>> # ... run strategy ...
|
|
117
|
+
>>> selector.update(strategy, reward=0.15)
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(self, strategies: List[str], exploration_factor: float = 2.0):
|
|
121
|
+
"""Initialize UCB selector."""
|
|
122
|
+
super().__init__(strategies)
|
|
123
|
+
self.exploration_factor = exploration_factor
|
|
124
|
+
|
|
125
|
+
def select_strategy(self) -> str:
|
|
126
|
+
"""
|
|
127
|
+
Select strategy using UCB algorithm.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Selected strategy name
|
|
131
|
+
"""
|
|
132
|
+
total_pulls = self.counts.sum()
|
|
133
|
+
|
|
134
|
+
# Pull each arm at least once
|
|
135
|
+
if total_pulls < self.num_strategies:
|
|
136
|
+
idx = int(total_pulls)
|
|
137
|
+
return self.strategies[idx]
|
|
138
|
+
|
|
139
|
+
# Compute UCB scores
|
|
140
|
+
mean_rewards = self.rewards / (self.counts + 1e-8)
|
|
141
|
+
exploration_bonus = self.exploration_factor * np.sqrt(
|
|
142
|
+
np.log(total_pulls) / (self.counts + 1e-8)
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
ucb_scores = mean_rewards + exploration_bonus
|
|
146
|
+
|
|
147
|
+
# Select strategy with highest UCB
|
|
148
|
+
best_idx = np.argmax(ucb_scores)
|
|
149
|
+
|
|
150
|
+
logger.debug(
|
|
151
|
+
f"UCB scores: {dict(zip(self.strategies, ucb_scores))}, "
|
|
152
|
+
f"selected: {self.strategies[best_idx]}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return self.strategies[best_idx]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class ThompsonSamplingSelector(StrategySelector):
|
|
159
|
+
"""
|
|
160
|
+
Thompson Sampling strategy selector.
|
|
161
|
+
|
|
162
|
+
Uses Beta distribution for each strategy:
|
|
163
|
+
Beta(alpha, beta) where alpha = successes, beta = failures
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
strategies: List of strategy names
|
|
167
|
+
prior_alpha: Prior alpha (pseudo-successes)
|
|
168
|
+
prior_beta: Prior beta (pseudo-failures)
|
|
169
|
+
|
|
170
|
+
Example:
|
|
171
|
+
>>> selector = ThompsonSamplingSelector(['GA', 'BO'])
|
|
172
|
+
>>> strategy = selector.select_strategy()
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def __init__(self, strategies: List[str], prior_alpha: float = 1.0, prior_beta: float = 1.0):
|
|
176
|
+
"""Initialize Thompson Sampling selector."""
|
|
177
|
+
super().__init__(strategies)
|
|
178
|
+
self.prior_alpha = prior_alpha
|
|
179
|
+
self.prior_beta = prior_beta
|
|
180
|
+
|
|
181
|
+
# Track successes and failures
|
|
182
|
+
self.successes = np.ones(self.num_strategies) * prior_alpha
|
|
183
|
+
self.failures = np.ones(self.num_strategies) * prior_beta
|
|
184
|
+
|
|
185
|
+
def select_strategy(self) -> str:
|
|
186
|
+
"""
|
|
187
|
+
Select strategy using Thompson Sampling.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Selected strategy name
|
|
191
|
+
"""
|
|
192
|
+
# Sample from Beta distribution for each strategy
|
|
193
|
+
samples = np.random.beta(self.successes, self.failures)
|
|
194
|
+
|
|
195
|
+
# Select strategy with highest sample
|
|
196
|
+
best_idx = np.argmax(samples)
|
|
197
|
+
|
|
198
|
+
logger.debug(
|
|
199
|
+
f"Thompson samples: {dict(zip(self.strategies, samples))}, "
|
|
200
|
+
f"selected: {self.strategies[best_idx]}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
return self.strategies[best_idx]
|
|
204
|
+
|
|
205
|
+
def update(self, strategy: str, reward: float) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Update with reward (interpreted as success probability).
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
strategy: Strategy used
|
|
211
|
+
reward: Reward in [0, 1] (treated as success probability)
|
|
212
|
+
"""
|
|
213
|
+
super().update(strategy, reward)
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
idx = self.strategies.index(strategy)
|
|
217
|
+
except ValueError:
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
# Interpret reward as success probability
|
|
221
|
+
# Use reward to update Beta distribution
|
|
222
|
+
self.successes[idx] += reward
|
|
223
|
+
self.failures[idx] += 1.0 - reward
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class EpsilonGreedySelector(StrategySelector):
|
|
227
|
+
"""
|
|
228
|
+
Epsilon-Greedy strategy selector.
|
|
229
|
+
|
|
230
|
+
With probability epsilon, explore (random choice).
|
|
231
|
+
With probability 1-epsilon, exploit (best strategy).
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
strategies: List of strategy names
|
|
235
|
+
epsilon: Exploration probability (default: 0.1)
|
|
236
|
+
epsilon_decay: Decay rate for epsilon (default: 0.99)
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
def __init__(self, strategies: List[str], epsilon: float = 0.1, epsilon_decay: float = 0.99):
|
|
240
|
+
"""Initialize Epsilon-Greedy selector."""
|
|
241
|
+
super().__init__(strategies)
|
|
242
|
+
self.epsilon = epsilon
|
|
243
|
+
self.initial_epsilon = epsilon
|
|
244
|
+
self.epsilon_decay = epsilon_decay
|
|
245
|
+
|
|
246
|
+
def select_strategy(self) -> str:
|
|
247
|
+
"""
|
|
248
|
+
Select strategy using epsilon-greedy.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Selected strategy name
|
|
252
|
+
"""
|
|
253
|
+
# Explore
|
|
254
|
+
if np.random.rand() < self.epsilon:
|
|
255
|
+
idx = np.random.randint(self.num_strategies)
|
|
256
|
+
logger.debug(f"Exploring: selected {self.strategies[idx]}")
|
|
257
|
+
return self.strategies[idx]
|
|
258
|
+
|
|
259
|
+
# Exploit
|
|
260
|
+
mean_rewards = self.rewards / (self.counts + 1e-8)
|
|
261
|
+
best_idx = np.argmax(mean_rewards)
|
|
262
|
+
|
|
263
|
+
logger.debug(f"Exploiting: selected {self.strategies[best_idx]}")
|
|
264
|
+
|
|
265
|
+
return self.strategies[best_idx]
|
|
266
|
+
|
|
267
|
+
def update(self, strategy: str, reward: float) -> None:
|
|
268
|
+
"""Update and decay epsilon."""
|
|
269
|
+
super().update(strategy, reward)
|
|
270
|
+
|
|
271
|
+
# Decay epsilon
|
|
272
|
+
self.epsilon *= self.epsilon_decay
|
|
273
|
+
|
|
274
|
+
def reset_epsilon(self) -> None:
|
|
275
|
+
"""Reset epsilon to initial value."""
|
|
276
|
+
self.epsilon = self.initial_epsilon
|