morphml 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of morphml might be problematic. Click here for more details.

Files changed (158) hide show
  1. morphml/__init__.py +14 -0
  2. morphml/api/__init__.py +26 -0
  3. morphml/api/app.py +326 -0
  4. morphml/api/auth.py +193 -0
  5. morphml/api/client.py +338 -0
  6. morphml/api/models.py +132 -0
  7. morphml/api/rate_limit.py +192 -0
  8. morphml/benchmarking/__init__.py +36 -0
  9. morphml/benchmarking/comparison.py +430 -0
  10. morphml/benchmarks/__init__.py +56 -0
  11. morphml/benchmarks/comparator.py +409 -0
  12. morphml/benchmarks/datasets.py +280 -0
  13. morphml/benchmarks/metrics.py +199 -0
  14. morphml/benchmarks/openml_suite.py +201 -0
  15. morphml/benchmarks/problems.py +289 -0
  16. morphml/benchmarks/suite.py +318 -0
  17. morphml/cli/__init__.py +5 -0
  18. morphml/cli/commands/experiment.py +329 -0
  19. morphml/cli/main.py +457 -0
  20. morphml/cli/quickstart.py +312 -0
  21. morphml/config.py +278 -0
  22. morphml/constraints/__init__.py +19 -0
  23. morphml/constraints/handler.py +205 -0
  24. morphml/constraints/predicates.py +285 -0
  25. morphml/core/__init__.py +3 -0
  26. morphml/core/crossover.py +449 -0
  27. morphml/core/dsl/README.md +359 -0
  28. morphml/core/dsl/__init__.py +72 -0
  29. morphml/core/dsl/ast_nodes.py +364 -0
  30. morphml/core/dsl/compiler.py +318 -0
  31. morphml/core/dsl/layers.py +368 -0
  32. morphml/core/dsl/lexer.py +336 -0
  33. morphml/core/dsl/parser.py +455 -0
  34. morphml/core/dsl/search_space.py +386 -0
  35. morphml/core/dsl/syntax.py +199 -0
  36. morphml/core/dsl/type_system.py +361 -0
  37. morphml/core/dsl/validator.py +386 -0
  38. morphml/core/graph/__init__.py +40 -0
  39. morphml/core/graph/edge.py +124 -0
  40. morphml/core/graph/graph.py +507 -0
  41. morphml/core/graph/mutations.py +409 -0
  42. morphml/core/graph/node.py +196 -0
  43. morphml/core/graph/serialization.py +361 -0
  44. morphml/core/graph/visualization.py +431 -0
  45. morphml/core/objectives/__init__.py +20 -0
  46. morphml/core/search/__init__.py +33 -0
  47. morphml/core/search/individual.py +252 -0
  48. morphml/core/search/parameters.py +453 -0
  49. morphml/core/search/population.py +375 -0
  50. morphml/core/search/search_engine.py +340 -0
  51. morphml/distributed/__init__.py +76 -0
  52. morphml/distributed/fault_tolerance.py +497 -0
  53. morphml/distributed/health_monitor.py +348 -0
  54. morphml/distributed/master.py +709 -0
  55. morphml/distributed/proto/README.md +224 -0
  56. morphml/distributed/proto/__init__.py +74 -0
  57. morphml/distributed/proto/worker.proto +170 -0
  58. morphml/distributed/proto/worker_pb2.py +79 -0
  59. morphml/distributed/proto/worker_pb2_grpc.py +423 -0
  60. morphml/distributed/resource_manager.py +416 -0
  61. morphml/distributed/scheduler.py +567 -0
  62. morphml/distributed/storage/__init__.py +33 -0
  63. morphml/distributed/storage/artifacts.py +381 -0
  64. morphml/distributed/storage/cache.py +366 -0
  65. morphml/distributed/storage/checkpointing.py +329 -0
  66. morphml/distributed/storage/database.py +459 -0
  67. morphml/distributed/worker.py +549 -0
  68. morphml/evaluation/__init__.py +5 -0
  69. morphml/evaluation/heuristic.py +237 -0
  70. morphml/exceptions.py +55 -0
  71. morphml/execution/__init__.py +5 -0
  72. morphml/execution/local_executor.py +350 -0
  73. morphml/integrations/__init__.py +28 -0
  74. morphml/integrations/jax_adapter.py +206 -0
  75. morphml/integrations/pytorch_adapter.py +530 -0
  76. morphml/integrations/sklearn_adapter.py +206 -0
  77. morphml/integrations/tensorflow_adapter.py +230 -0
  78. morphml/logging_config.py +93 -0
  79. morphml/meta_learning/__init__.py +66 -0
  80. morphml/meta_learning/architecture_similarity.py +277 -0
  81. morphml/meta_learning/experiment_database.py +240 -0
  82. morphml/meta_learning/knowledge_base/__init__.py +19 -0
  83. morphml/meta_learning/knowledge_base/embedder.py +179 -0
  84. morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
  85. morphml/meta_learning/knowledge_base/meta_features.py +265 -0
  86. morphml/meta_learning/knowledge_base/vector_store.py +271 -0
  87. morphml/meta_learning/predictors/__init__.py +27 -0
  88. morphml/meta_learning/predictors/ensemble.py +221 -0
  89. morphml/meta_learning/predictors/gnn_predictor.py +552 -0
  90. morphml/meta_learning/predictors/learning_curve.py +231 -0
  91. morphml/meta_learning/predictors/proxy_metrics.py +261 -0
  92. morphml/meta_learning/strategy_evolution/__init__.py +27 -0
  93. morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
  94. morphml/meta_learning/strategy_evolution/bandit.py +276 -0
  95. morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
  96. morphml/meta_learning/transfer.py +581 -0
  97. morphml/meta_learning/warm_start.py +286 -0
  98. morphml/optimizers/__init__.py +74 -0
  99. morphml/optimizers/adaptive_operators.py +399 -0
  100. morphml/optimizers/bayesian/__init__.py +52 -0
  101. morphml/optimizers/bayesian/acquisition.py +387 -0
  102. morphml/optimizers/bayesian/base.py +319 -0
  103. morphml/optimizers/bayesian/gaussian_process.py +635 -0
  104. morphml/optimizers/bayesian/smac.py +534 -0
  105. morphml/optimizers/bayesian/tpe.py +411 -0
  106. morphml/optimizers/differential_evolution.py +220 -0
  107. morphml/optimizers/evolutionary/__init__.py +61 -0
  108. morphml/optimizers/evolutionary/cma_es.py +416 -0
  109. morphml/optimizers/evolutionary/differential_evolution.py +556 -0
  110. morphml/optimizers/evolutionary/encoding.py +426 -0
  111. morphml/optimizers/evolutionary/particle_swarm.py +449 -0
  112. morphml/optimizers/genetic_algorithm.py +486 -0
  113. morphml/optimizers/gradient_based/__init__.py +22 -0
  114. morphml/optimizers/gradient_based/darts.py +550 -0
  115. morphml/optimizers/gradient_based/enas.py +585 -0
  116. morphml/optimizers/gradient_based/operations.py +474 -0
  117. morphml/optimizers/gradient_based/utils.py +601 -0
  118. morphml/optimizers/hill_climbing.py +169 -0
  119. morphml/optimizers/multi_objective/__init__.py +56 -0
  120. morphml/optimizers/multi_objective/indicators.py +504 -0
  121. morphml/optimizers/multi_objective/nsga2.py +647 -0
  122. morphml/optimizers/multi_objective/visualization.py +427 -0
  123. morphml/optimizers/nsga2.py +308 -0
  124. morphml/optimizers/random_search.py +172 -0
  125. morphml/optimizers/simulated_annealing.py +181 -0
  126. morphml/plugins/__init__.py +35 -0
  127. morphml/plugins/custom_evaluator_example.py +81 -0
  128. morphml/plugins/custom_optimizer_example.py +63 -0
  129. morphml/plugins/plugin_system.py +454 -0
  130. morphml/reports/__init__.py +30 -0
  131. morphml/reports/generator.py +362 -0
  132. morphml/tracking/__init__.py +7 -0
  133. morphml/tracking/experiment.py +309 -0
  134. morphml/tracking/logger.py +301 -0
  135. morphml/tracking/reporter.py +357 -0
  136. morphml/utils/__init__.py +6 -0
  137. morphml/utils/checkpoint.py +189 -0
  138. morphml/utils/comparison.py +390 -0
  139. morphml/utils/export.py +407 -0
  140. morphml/utils/progress.py +392 -0
  141. morphml/utils/validation.py +392 -0
  142. morphml/version.py +7 -0
  143. morphml/visualization/__init__.py +50 -0
  144. morphml/visualization/analytics.py +423 -0
  145. morphml/visualization/architecture_diagrams.py +353 -0
  146. morphml/visualization/architecture_plot.py +223 -0
  147. morphml/visualization/convergence_plot.py +174 -0
  148. morphml/visualization/crossover_viz.py +386 -0
  149. morphml/visualization/graph_viz.py +338 -0
  150. morphml/visualization/pareto_plot.py +149 -0
  151. morphml/visualization/plotly_dashboards.py +422 -0
  152. morphml/visualization/population.py +309 -0
  153. morphml/visualization/progress.py +260 -0
  154. morphml-1.0.0.dist-info/METADATA +434 -0
  155. morphml-1.0.0.dist-info/RECORD +158 -0
  156. morphml-1.0.0.dist-info/WHEEL +4 -0
  157. morphml-1.0.0.dist-info/entry_points.txt +3 -0
  158. morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,226 @@
1
+ """Adaptive optimizer that switches strategies dynamically.
2
+
3
+ Author: Eshan Roy <eshanized@proton.me>
4
+ Organization: TONMOY INFRASTRUCTURE & VISION
5
+ """
6
+
7
+ from typing import Any, Callable, Dict, List, Optional
8
+
9
+ from morphml.core.dsl import SearchSpace
10
+ from morphml.core.graph import ModelGraph
11
+ from morphml.logging_config import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ class AdaptiveOptimizer:
17
+ """
18
+ Adaptive optimizer that switches between strategies.
19
+
20
+ Uses multi-armed bandits to learn which optimizer works best
21
+ and dynamically switches during search.
22
+
23
+ Args:
24
+ search_space: Search space definition
25
+ evaluator: Architecture evaluation function
26
+ strategy_configs: Dict mapping strategy name to config
27
+ selector_type: Type of strategy selector ('ucb', 'thompson', 'epsilon')
28
+ selector_config: Config for selector
29
+
30
+ Example:
31
+ >>> from morphml.optimizers import GeneticAlgorithm, RandomSearch
32
+ >>>
33
+ >>> configs = {
34
+ ... 'ga': {'population_size': 50, 'num_generations': 10},
35
+ ... 'random': {'num_samples': 100}
36
+ ... }
37
+ >>>
38
+ >>> optimizer = AdaptiveOptimizer(
39
+ ... search_space=space,
40
+ ... evaluator=evaluator,
41
+ ... strategy_configs=configs
42
+ ... )
43
+ >>>
44
+ >>> best = optimizer.search(budget=500)
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ search_space: SearchSpace,
50
+ evaluator: Callable[[ModelGraph], float],
51
+ strategy_configs: Dict[str, Dict[str, Any]],
52
+ selector_type: str = "ucb",
53
+ selector_config: Optional[Dict[str, Any]] = None,
54
+ ):
55
+ """Initialize adaptive optimizer."""
56
+ self.search_space = search_space
57
+ self.evaluator = evaluator
58
+ self.strategy_configs = strategy_configs
59
+
60
+ # Create strategy selector
61
+ strategies = list(strategy_configs.keys())
62
+ selector_config = selector_config or {}
63
+
64
+ if selector_type == "ucb":
65
+ from morphml.meta_learning.strategy_evolution.bandit import UCBSelector
66
+
67
+ self.selector = UCBSelector(strategies, **selector_config)
68
+ elif selector_type == "thompson":
69
+ from morphml.meta_learning.strategy_evolution.bandit import ThompsonSamplingSelector
70
+
71
+ self.selector = ThompsonSamplingSelector(strategies, **selector_config)
72
+ elif selector_type == "epsilon":
73
+ from morphml.meta_learning.strategy_evolution.bandit import EpsilonGreedySelector
74
+
75
+ self.selector = EpsilonGreedySelector(strategies, **selector_config)
76
+ else:
77
+ raise ValueError(f"Unknown selector type: {selector_type}")
78
+
79
+ # Track progress
80
+ self.history = []
81
+ self.best_fitness = -float("inf")
82
+ self.best_architecture = None
83
+
84
+ logger.info(
85
+ f"Initialized AdaptiveOptimizer with {len(strategies)} strategies: {strategies}"
86
+ )
87
+
88
+ def search(self, budget: int = 500, checkpoint_interval: int = 50) -> ModelGraph:
89
+ """
90
+ Run adaptive search.
91
+
92
+ Args:
93
+ budget: Total number of evaluations
94
+ checkpoint_interval: How often to report progress
95
+
96
+ Returns:
97
+ Best architecture found
98
+ """
99
+ logger.info(f"Starting adaptive search with budget={budget}")
100
+
101
+ evaluations_used = 0
102
+
103
+ while evaluations_used < budget:
104
+ # Select strategy
105
+ strategy = self.selector.select_strategy()
106
+
107
+ logger.info(f"Evaluations: {evaluations_used}/{budget}, Using strategy: {strategy}")
108
+
109
+ # Run strategy for a batch
110
+ batch_size = min(50, budget - evaluations_used)
111
+ batch_results = self._run_strategy_batch(strategy, batch_size)
112
+
113
+ # Track results
114
+ self.history.extend(batch_results)
115
+ evaluations_used += len(batch_results)
116
+
117
+ # Compute reward (improvement in best fitness)
118
+ prev_best = self.best_fitness
119
+ for arch, fitness in batch_results:
120
+ if fitness > self.best_fitness:
121
+ self.best_fitness = fitness
122
+ self.best_architecture = arch
123
+
124
+ improvement = self.best_fitness - prev_best
125
+
126
+ # Update selector
127
+ self.selector.update(strategy, reward=improvement)
128
+
129
+ # Checkpoint
130
+ if evaluations_used % checkpoint_interval == 0:
131
+ stats = self.selector.get_statistics()
132
+ logger.info(
133
+ f"Progress: {evaluations_used}/{budget}, "
134
+ f"Best fitness: {self.best_fitness:.4f}"
135
+ )
136
+ logger.info(f"Strategy stats: {stats}")
137
+
138
+ # Final summary
139
+ stats = self.selector.get_statistics()
140
+ best_strategy = self.selector.get_best_strategy()
141
+
142
+ logger.info("Search complete!")
143
+ logger.info(f"Best strategy: {best_strategy}")
144
+ logger.info(f"Final statistics: {stats}")
145
+ logger.info(f"Best fitness: {self.best_fitness:.4f}")
146
+
147
+ return self.best_architecture
148
+
149
+ def _run_strategy_batch(self, strategy: str, batch_size: int) -> List[tuple]:
150
+ """
151
+ Run a strategy for a batch of evaluations.
152
+
153
+ Args:
154
+ strategy: Strategy name
155
+ batch_size: Number of evaluations
156
+
157
+ Returns:
158
+ List of (architecture, fitness) tuples
159
+ """
160
+ results = []
161
+
162
+ # Create appropriate optimizer
163
+ config = self.strategy_configs[strategy]
164
+
165
+ if strategy == "random":
166
+ # Random sampling
167
+ for _ in range(batch_size):
168
+ arch = self.search_space.sample()
169
+ fitness = self.evaluator(arch)
170
+ results.append((arch, fitness))
171
+
172
+ elif strategy in ["ga", "genetic"]:
173
+ # Genetic algorithm
174
+ from morphml.optimizers import GeneticAlgorithm
175
+
176
+ # Run for mini-generations
177
+ mini_generations = batch_size // config.get("population_size", 20)
178
+ mini_generations = max(1, mini_generations)
179
+
180
+ ga = GeneticAlgorithm(
181
+ search_space=self.search_space,
182
+ evaluator=self.evaluator,
183
+ population_size=config.get("population_size", 20),
184
+ num_generations=mini_generations,
185
+ mutation_prob=config.get("mutation_prob", 0.2),
186
+ crossover_prob=config.get("crossover_prob", 0.8),
187
+ )
188
+
189
+ ga.search()
190
+
191
+ # Extract results
192
+ results = [(ind.architecture, ind.fitness) for ind in ga.history]
193
+
194
+ elif strategy in ["bo", "bayesian"]:
195
+ # Bayesian optimization (simplified)
196
+ # Just use best from random for now
197
+ for _ in range(batch_size):
198
+ arch = self.search_space.sample()
199
+ fitness = self.evaluator(arch)
200
+ results.append((arch, fitness))
201
+
202
+ else:
203
+ logger.warning(f"Unknown strategy {strategy}, using random")
204
+ for _ in range(batch_size):
205
+ arch = self.search_space.sample()
206
+ fitness = self.evaluator(arch)
207
+ results.append((arch, fitness))
208
+
209
+ return results[:batch_size] # Ensure exact batch size
210
+
211
+ def get_search_trajectory(self) -> List[float]:
212
+ """
213
+ Get fitness trajectory over search.
214
+
215
+ Returns:
216
+ List of best fitness values over time
217
+ """
218
+ trajectory = []
219
+ best_so_far = -float("inf")
220
+
221
+ for _, fitness in self.history:
222
+ if fitness > best_so_far:
223
+ best_so_far = fitness
224
+ trajectory.append(best_so_far)
225
+
226
+ return trajectory
@@ -0,0 +1,276 @@
1
+ """Multi-armed bandit algorithms for strategy selection.
2
+
3
+ Author: Eshan Roy <eshanized@proton.me>
4
+ Organization: TONMOY INFRASTRUCTURE & VISION
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Dict, List
9
+
10
+ import numpy as np
11
+
12
+ from morphml.logging_config import get_logger
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class StrategySelector(ABC):
18
+ """
19
+ Base class for strategy selection algorithms.
20
+
21
+ Subclasses implement different bandit algorithms:
22
+ - UCB (Upper Confidence Bound)
23
+ - Thompson Sampling
24
+ - Epsilon-Greedy
25
+ """
26
+
27
+ def __init__(self, strategies: List[str]):
28
+ """
29
+ Initialize selector.
30
+
31
+ Args:
32
+ strategies: List of strategy names
33
+ """
34
+ self.strategies = strategies
35
+ self.num_strategies = len(strategies)
36
+
37
+ # Statistics
38
+ self.counts = np.zeros(self.num_strategies)
39
+ self.rewards = np.zeros(self.num_strategies)
40
+
41
+ logger.info(f"Initialized {self.__class__.__name__} with {self.num_strategies} strategies")
42
+
43
+ @abstractmethod
44
+ def select_strategy(self) -> str:
45
+ """Select a strategy to use."""
46
+ pass
47
+
48
+ def update(self, strategy: str, reward: float) -> None:
49
+ """
50
+ Update statistics after using a strategy.
51
+
52
+ Args:
53
+ strategy: Strategy that was used
54
+ reward: Reward obtained (e.g., fitness improvement)
55
+ """
56
+ try:
57
+ idx = self.strategies.index(strategy)
58
+ except ValueError:
59
+ logger.warning(f"Unknown strategy: {strategy}")
60
+ return
61
+
62
+ self.counts[idx] += 1
63
+ self.rewards[idx] += reward
64
+
65
+ logger.debug(
66
+ f"Updated {strategy}: pulls={self.counts[idx]:.0f}, "
67
+ f"avg_reward={self.rewards[idx]/self.counts[idx]:.4f}"
68
+ )
69
+
70
+ def get_statistics(self) -> Dict[str, Dict[str, float]]:
71
+ """
72
+ Get statistics for all strategies.
73
+
74
+ Returns:
75
+ Dict mapping strategy name to statistics
76
+ """
77
+ stats = {}
78
+
79
+ for i, strategy in enumerate(self.strategies):
80
+ if self.counts[i] > 0:
81
+ stats[strategy] = {
82
+ "pulls": int(self.counts[i]),
83
+ "total_reward": float(self.rewards[i]),
84
+ "avg_reward": float(self.rewards[i] / self.counts[i]),
85
+ }
86
+ else:
87
+ stats[strategy] = {
88
+ "pulls": 0,
89
+ "total_reward": 0.0,
90
+ "avg_reward": 0.0,
91
+ }
92
+
93
+ return stats
94
+
95
+ def get_best_strategy(self) -> str:
96
+ """Get strategy with highest average reward."""
97
+ mean_rewards = self.rewards / (self.counts + 1e-8)
98
+ best_idx = np.argmax(mean_rewards)
99
+ return self.strategies[best_idx]
100
+
101
+
102
+ class UCBSelector(StrategySelector):
103
+ """
104
+ Upper Confidence Bound (UCB) strategy selector.
105
+
106
+ UCB balances exploration and exploitation using:
107
+ UCB(i) = mean_reward(i) + c * sqrt(log(total) / pulls(i))
108
+
109
+ Args:
110
+ strategies: List of strategy names
111
+ exploration_factor: Exploration constant (default: 2.0)
112
+
113
+ Example:
114
+ >>> selector = UCBSelector(['GA', 'BO', 'DE'])
115
+ >>> strategy = selector.select_strategy()
116
+ >>> # ... run strategy ...
117
+ >>> selector.update(strategy, reward=0.15)
118
+ """
119
+
120
+ def __init__(self, strategies: List[str], exploration_factor: float = 2.0):
121
+ """Initialize UCB selector."""
122
+ super().__init__(strategies)
123
+ self.exploration_factor = exploration_factor
124
+
125
+ def select_strategy(self) -> str:
126
+ """
127
+ Select strategy using UCB algorithm.
128
+
129
+ Returns:
130
+ Selected strategy name
131
+ """
132
+ total_pulls = self.counts.sum()
133
+
134
+ # Pull each arm at least once
135
+ if total_pulls < self.num_strategies:
136
+ idx = int(total_pulls)
137
+ return self.strategies[idx]
138
+
139
+ # Compute UCB scores
140
+ mean_rewards = self.rewards / (self.counts + 1e-8)
141
+ exploration_bonus = self.exploration_factor * np.sqrt(
142
+ np.log(total_pulls) / (self.counts + 1e-8)
143
+ )
144
+
145
+ ucb_scores = mean_rewards + exploration_bonus
146
+
147
+ # Select strategy with highest UCB
148
+ best_idx = np.argmax(ucb_scores)
149
+
150
+ logger.debug(
151
+ f"UCB scores: {dict(zip(self.strategies, ucb_scores))}, "
152
+ f"selected: {self.strategies[best_idx]}"
153
+ )
154
+
155
+ return self.strategies[best_idx]
156
+
157
+
158
+ class ThompsonSamplingSelector(StrategySelector):
159
+ """
160
+ Thompson Sampling strategy selector.
161
+
162
+ Uses Beta distribution for each strategy:
163
+ Beta(alpha, beta) where alpha = successes, beta = failures
164
+
165
+ Args:
166
+ strategies: List of strategy names
167
+ prior_alpha: Prior alpha (pseudo-successes)
168
+ prior_beta: Prior beta (pseudo-failures)
169
+
170
+ Example:
171
+ >>> selector = ThompsonSamplingSelector(['GA', 'BO'])
172
+ >>> strategy = selector.select_strategy()
173
+ """
174
+
175
+ def __init__(self, strategies: List[str], prior_alpha: float = 1.0, prior_beta: float = 1.0):
176
+ """Initialize Thompson Sampling selector."""
177
+ super().__init__(strategies)
178
+ self.prior_alpha = prior_alpha
179
+ self.prior_beta = prior_beta
180
+
181
+ # Track successes and failures
182
+ self.successes = np.ones(self.num_strategies) * prior_alpha
183
+ self.failures = np.ones(self.num_strategies) * prior_beta
184
+
185
+ def select_strategy(self) -> str:
186
+ """
187
+ Select strategy using Thompson Sampling.
188
+
189
+ Returns:
190
+ Selected strategy name
191
+ """
192
+ # Sample from Beta distribution for each strategy
193
+ samples = np.random.beta(self.successes, self.failures)
194
+
195
+ # Select strategy with highest sample
196
+ best_idx = np.argmax(samples)
197
+
198
+ logger.debug(
199
+ f"Thompson samples: {dict(zip(self.strategies, samples))}, "
200
+ f"selected: {self.strategies[best_idx]}"
201
+ )
202
+
203
+ return self.strategies[best_idx]
204
+
205
+ def update(self, strategy: str, reward: float) -> None:
206
+ """
207
+ Update with reward (interpreted as success probability).
208
+
209
+ Args:
210
+ strategy: Strategy used
211
+ reward: Reward in [0, 1] (treated as success probability)
212
+ """
213
+ super().update(strategy, reward)
214
+
215
+ try:
216
+ idx = self.strategies.index(strategy)
217
+ except ValueError:
218
+ return
219
+
220
+ # Interpret reward as success probability
221
+ # Use reward to update Beta distribution
222
+ self.successes[idx] += reward
223
+ self.failures[idx] += 1.0 - reward
224
+
225
+
226
+ class EpsilonGreedySelector(StrategySelector):
227
+ """
228
+ Epsilon-Greedy strategy selector.
229
+
230
+ With probability epsilon, explore (random choice).
231
+ With probability 1-epsilon, exploit (best strategy).
232
+
233
+ Args:
234
+ strategies: List of strategy names
235
+ epsilon: Exploration probability (default: 0.1)
236
+ epsilon_decay: Decay rate for epsilon (default: 0.99)
237
+ """
238
+
239
+ def __init__(self, strategies: List[str], epsilon: float = 0.1, epsilon_decay: float = 0.99):
240
+ """Initialize Epsilon-Greedy selector."""
241
+ super().__init__(strategies)
242
+ self.epsilon = epsilon
243
+ self.initial_epsilon = epsilon
244
+ self.epsilon_decay = epsilon_decay
245
+
246
+ def select_strategy(self) -> str:
247
+ """
248
+ Select strategy using epsilon-greedy.
249
+
250
+ Returns:
251
+ Selected strategy name
252
+ """
253
+ # Explore
254
+ if np.random.rand() < self.epsilon:
255
+ idx = np.random.randint(self.num_strategies)
256
+ logger.debug(f"Exploring: selected {self.strategies[idx]}")
257
+ return self.strategies[idx]
258
+
259
+ # Exploit
260
+ mean_rewards = self.rewards / (self.counts + 1e-8)
261
+ best_idx = np.argmax(mean_rewards)
262
+
263
+ logger.debug(f"Exploiting: selected {self.strategies[best_idx]}")
264
+
265
+ return self.strategies[best_idx]
266
+
267
+ def update(self, strategy: str, reward: float) -> None:
268
+ """Update and decay epsilon."""
269
+ super().update(strategy, reward)
270
+
271
+ # Decay epsilon
272
+ self.epsilon *= self.epsilon_decay
273
+
274
+ def reset_epsilon(self) -> None:
275
+ """Reset epsilon to initial value."""
276
+ self.epsilon = self.initial_epsilon