evograd-diff 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. evograd/__init__.py +67 -0
  2. evograd/algorithms/__init__.py +138 -0
  3. evograd/algorithms/cmaes.py +1365 -0
  4. evograd/algorithms/de.py +895 -0
  5. evograd/algorithms/ga.py +532 -0
  6. evograd/algorithms/pso.py +648 -0
  7. evograd/algorithms/shade.py +1165 -0
  8. evograd/benchmarks/functions/__init__.py +229 -0
  9. evograd/benchmarks/functions/base.py +217 -0
  10. evograd/benchmarks/functions/cec2017/__init__.py +250 -0
  11. evograd/benchmarks/functions/cec2017/basic.py +413 -0
  12. evograd/benchmarks/functions/cec2017/composition.py +580 -0
  13. evograd/benchmarks/functions/cec2017/data.pkl +0 -0
  14. evograd/benchmarks/functions/cec2017/data.py +350 -0
  15. evograd/benchmarks/functions/cec2017/hybrid.py +406 -0
  16. evograd/benchmarks/functions/cec2017/simple.py +326 -0
  17. evograd/benchmarks/functions/classical.py +649 -0
  18. evograd/benchmarks/functions/smoothed_funnel.py +476 -0
  19. evograd/benchmarks/functions/transforms.py +463 -0
  20. evograd/benchmarks/run_benchmark_functions.py +1208 -0
  21. evograd/core/__init__.py +73 -0
  22. evograd/core/algorithm.py +778 -0
  23. evograd/core/maximize.py +269 -0
  24. evograd/core/minimize.py +740 -0
  25. evograd/core/problem.py +444 -0
  26. evograd/core/result.py +571 -0
  27. evograd/core/termination.py +602 -0
  28. evograd/operators/__init__.py +178 -0
  29. evograd/operators/crossover.py +1117 -0
  30. evograd/operators/mutation.py +1098 -0
  31. evograd/operators/relaxations.py +175 -0
  32. evograd/operators/repair.py +601 -0
  33. evograd/operators/sampling.py +577 -0
  34. evograd/operators/selection.py +981 -0
  35. evograd/operators/survival.py +1000 -0
  36. evograd/tests/__init__.py +11 -0
  37. evograd/tests/run_all.py +78 -0
  38. evograd/tests/test_core.py +528 -0
  39. evograd/tests/test_ga.py +572 -0
  40. evograd/tests/test_operators.py +662 -0
  41. evograd/tests/test_per_individual.py +326 -0
  42. evograd/tests/test_utils.py +328 -0
  43. evograd/utils/__init__.py +97 -0
  44. evograd/utils/callbacks.py +926 -0
  45. evograd/utils/device.py +502 -0
  46. evograd/utils/duplicates.py +421 -0
  47. evograd_diff-0.1.0.dist-info/METADATA +439 -0
  48. evograd_diff-0.1.0.dist-info/RECORD +50 -0
  49. evograd_diff-0.1.0.dist-info/WHEEL +4 -0
  50. evograd_diff-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,740 @@
1
+ """
2
+ Minimisation function for EvoGrad optimisation.
3
+
4
+ This module provides the main entry point for running optimisation,
5
+ following pymoo's interface style where algorithm initialisation
6
+ happens inside the minimize function.
7
+
8
+ Example:
9
+ >>> from evograd.core.problem import Problem
10
+ >>> from evograd.core.minimize import minimize
11
+ >>> from evograd.core.termination import MaxEvaluations
12
+ >>> from evograd.algorithms import GA
13
+ >>>
14
+ >>> # Define problem
15
+ >>> problem = Problem(
16
+ ... objective=lambda x: (x**2).sum(dim=-1),
17
+ ... n_var=30,
18
+ ... xl=-100.0,
19
+ ... xu=100.0,
20
+ ... )
21
+ >>>
22
+ >>> # Create algorithm (not initialized)
23
+ >>> algorithm = GA(pop_size=100, eliminate_duplicates=True)
24
+ >>>
25
+ >>> # Run optimisation
26
+ >>> result = minimize(
27
+ ... problem,
28
+ ... algorithm,
29
+ ... termination=MaxEvaluations(10000),
30
+ ... seed=42,
31
+ ... verbose=True,
32
+ ... )
33
+ >>>
34
+ >>> print(f"Best fitness: {result.best_fitness}")
35
+ >>> print(f"Best solution: {result.best_solution}")
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import time
41
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
42
+
43
+ import torch
44
+
45
+ from evograd.core.result import Result, ResultBuilder
46
+ from evograd.core.termination import (
47
+ Termination,
48
+ TerminationCollection,
49
+ TargetReached,
50
+ default_termination,
51
+ )
52
+ from evograd.utils.callbacks import (
53
+ Callback,
54
+ CallbackList,
55
+ CallbackState,
56
+ HistoryCallback,
57
+ PrintCallback,
58
+ )
59
+ from evograd.utils.device import set_seed
60
+
61
+ if TYPE_CHECKING:
62
+ from evograd.core.algorithm import Algorithm
63
+ from evograd.core.problem import Problem
64
+
65
+ __all__ = [
66
+ "minimize",
67
+ ]
68
+
69
+ _OPT_DEFAULTS = {
70
+ "GA": dict(lr_pop=3e-4, lr_hyper=0.001, grad_clip_pop=0.2, grad_clip_hyper=0.2, pop_momentum=0.0),
71
+ "DE": dict(lr_pop=0.01, lr_hyper=0.001, grad_clip_pop=0.5, grad_clip_hyper=0.3, pop_momentum=0.9),
72
+ "PSO": dict(lr_pop=0.001, lr_hyper=0.001, grad_clip_pop=1.0, grad_clip_hyper=0.1, pop_momentum=0.9),
73
+ "CMAES": dict(lr_pop=0.003, lr_hyper=0.0003, grad_clip_pop=0.5, grad_clip_hyper=0.1, pop_momentum=0.9),
74
+ }
75
+
76
+ def minimize(
77
+ problem: Problem,
78
+ algorithm: Algorithm,
79
+ termination: Optional[Termination] = None,
80
+ seed: Optional[int] = None,
81
+ verbose: bool = True,
82
+ callback: Optional[Union[Callback, List[Callback]]] = None,
83
+ copy_algorithm: bool = False,
84
+ save_history: bool = True,
85
+ initialize: bool = True,
86
+ # Differentiable mode options
87
+ optimizer: Optional[torch.optim.Optimizer] = None,
88
+ lr_pop: Optional[float] = None,
89
+ lr_hyper: Optional[float] = None,
90
+ grad_clip_pop: Optional[float] = None,
91
+ grad_clip_hyper: Optional[float] = None,
92
+ scheduler: Optional[str] = None,
93
+ scheduler_patience: int = 50,
94
+ scheduler_factor: float = 0.5,
95
+ min_lr: float = 1e-6,
96
+ ) -> Result:
97
+
98
+ """
99
+ Minimise an objective function using a population-based algorithm.
100
+
101
+ This function initialises the algorithm with the problem and runs
102
+ the optimisation loop until termination criteria are met. Follows
103
+ pymoo's interface style.
104
+
105
+ Differentiable Mode
106
+ -------------------
107
+ EvoGrad automatically detects learnable parameters (nn.Parameter with
108
+ requires_grad=True) and uses backpropagation to update them. This covers:
109
+
110
+ - Population updates: algorithm.differentiable=True
111
+ - Operator hyperparameters: operator.differentiable=True
112
+ - Adaptive PSO coefficients: adaptive=True (w, c1, c2 per particle)
113
+ - Any other learnable parameters in the algorithm
114
+
115
+ Thus, EvoGrad supports four combinations of differentiability:
116
+
117
+ 1. algorithm.differentiable=False, operators.differentiable=False
118
+ → Pure classical EA, no backpropagation
119
+
120
+ 2. algorithm.differentiable=False, operators.differentiable=True
121
+ → Classical EA dynamics, but learn operator hyperparameters
122
+ (e.g., crossover eta, mutation rate, PSO w/c1/c2)
123
+
124
+ 3. algorithm.differentiable=True, operators.differentiable=False
125
+ → Gradient-based population updates (local search), fixed operators
126
+
127
+ 4. algorithm.differentiable=True, operators.differentiable=True
128
+ → Full end-to-end differentiable optimisation
129
+
130
+ Args:
131
+ problem: Problem instance defining the objective function,
132
+ bounds, and constraints.
133
+ algorithm: Algorithm instance (e.g., GA, DE, PSO, CMAES).
134
+ Will be initialized inside this function.
135
+ termination: When to stop optimisation. Must be a Termination
136
+ instance (e.g., MaxEvaluations(10000)). If None, uses
137
+ default (10000 evaluations).
138
+ seed: Random seed for reproducibility. Applied before
139
+ algorithm initialisation.
140
+ verbose: If True, print progress during optimisation.
141
+ callback: Single Callback or list of Callbacks for monitoring.
142
+ HistoryCallback is always included automatically.
143
+ copy_algorithm: If True, create a copy of the algorithm to
144
+ preserve the original. Default False.
145
+ save_history: If True (default), save convergence history
146
+ in result. Set False to reduce memory for long runs.
147
+ initialize: If True (default), initialize the algorithm with the
148
+ problem. Set to False to continue optimization with an already
149
+ initialized algorithm (e.g., when switching problems at runtime
150
+ while preserving population state and hyperparameters).
151
+ The algorithm must have been previously initialized. When False,
152
+ the termination budget is additive (e.g., MaxEvaluations(500)
153
+ will run 500 more evaluations from the current state).
154
+
155
+ # Differentiable mode options (used if learnable params exist):
156
+ optimizer: PyTorch optimizer for gradient-based updates.
157
+ If None, SGD is used with specified lr.
158
+ lr_pop: Learning rate for gradient-based updates of the population (default: 1e-2).
159
+ lr_hyper: Learning rate for gradient-based updates of the hyperparameters (default: 1e-3).
160
+ grad_clip_pop: Maximum gradient norm for clipping population gradient (None = no clipping).
161
+ grad_clip_hyper: Maximum gradient norm for clipping hyperparam gradient (None = no clipping).
162
+ scheduler: Learning rate scheduler type:
163
+ - 'plateau': Reduce on plateau (default)
164
+ - 'step': Reduce every N generations
165
+ - 'cosine': Cosine annealing
166
+ - 'exponential': Exponential decay
167
+ - None: No scheduler
168
+ scheduler_patience: Generations without improvement before
169
+ reducing LR (for 'plateau' scheduler).
170
+ scheduler_factor: Factor to multiply LR when reducing.
171
+ min_lr: Minimum learning rate.
172
+
173
+ Returns:
174
+ Result object containing:
175
+ - best_solution: Best solution found
176
+ - best_fitness: Best fitness value
177
+ - population: Final population
178
+ - fitness: Final fitness values
179
+ - n_evals: Total evaluations
180
+ - n_gen: Total generations
181
+ - history: Convergence history (if save_history=True)
182
+ - success: Whether target was reached
183
+
184
+ Example:
185
+ >>> # Basic usage (classical EA)
186
+ >>> result = minimize(problem, GA(pop_size=100), seed=42)
187
+ >>>
188
+ >>> # Learn operator hyperparameters with classical dynamics
189
+ >>> from evograd.operators import SBX, PolynomialMutation
190
+ >>> algorithm = GA(
191
+ ... pop_size=100,
192
+ ... crossover=SBX(eta=15, differentiable=True),
193
+ ... mutation=PolynomialMutation(eta=20, differentiable=True),
194
+ ... differentiable=False, # Population not updated via gradients
195
+ ... )
196
+ >>> result = minimize(problem, algorithm, termination=MaxEvaluations(10000))
197
+ >>>
198
+ >>> # Full differentiable mode
199
+ >>> algorithm = GA(pop_size=100, differentiable=True)
200
+ >>> result = minimize(problem, algorithm, lr=0.01, grad_clip_pop=1.0)
201
+ >>>
202
+ >>> # Continue optimization with a different problem (e.g., surrogate -> true)
203
+ >>> # First optimize with surrogate problem
204
+ >>> pso = PSO(pop_size=100, differentiable=True)
205
+ >>> result1 = minimize(surrogate_problem, pso, termination=MaxEvaluations(10000))
206
+ >>> # Then continue with true problem (preserves velocities, personal bests)
207
+ >>> result2 = minimize(true_problem, pso, termination=MaxEvaluations(500),
208
+ ... initialize=False)
209
+
210
+ Note:
211
+ By default (initialize=True), the algorithm is initialized inside this
212
+ function. Do not call algorithm.initialize() before passing to minimize().
213
+
214
+ When initialize=False, the algorithm must have been previously initialized
215
+ (e.g., from a prior minimize() call). This allows switching problems at
216
+ runtime while preserving population state, velocities, and personal bests.
217
+ """
218
+ # -------------------------------------------------------------------------
219
+ # Setup
220
+ # -------------------------------------------------------------------------
221
+
222
+ # Set seed first for reproducibility
223
+ if seed is not None:
224
+ set_seed(seed)
225
+
226
+ # Copy algorithm if requested
227
+ if copy_algorithm:
228
+ import copy
229
+ algorithm = copy.deepcopy(algorithm)
230
+
231
+ # Parse termination criteria
232
+ termination = _parse_termination(termination)
233
+
234
+ # Setup callbacks
235
+ callbacks = _setup_callbacks(callback, verbose, save_history)
236
+
237
+ # Initialize algorithm with problem (or continue with existing state)
238
+ if initialize:
239
+ algorithm.initialize(problem)
240
+ else:
241
+ # Continue with existing algorithm state but update problem reference
242
+ # This preserves population, velocities, personal bests, etc.
243
+ if not hasattr(algorithm, 'generation') or algorithm.generation == 0:
244
+ raise ValueError(
245
+ "initialize=False requires a previously initialized algorithm. "
246
+ "Run minimize() with initialize=True first."
247
+ )
248
+ # Update problem reference and bounds
249
+ algorithm.problem = problem
250
+ algorithm.xl = problem.xl
251
+ algorithm.xu = problem.xu
252
+
253
+ # Re-evaluate the current population on the new problem so that
254
+ # fitness values (including personal bests in PSO) are consistent
255
+ # with the new objective. Without this, stale fitness values from
256
+ # the old problem prevent the algorithm from accepting any new
257
+ # solutions (e.g., surrogate fitness ~0.003 vs ODE fitness ~200).
258
+ with torch.no_grad():
259
+ new_fitness = algorithm._evaluate(algorithm.population)
260
+ algorithm.state.fitness = new_fitness
261
+ algorithm.state.best_fitness = float('inf')
262
+ algorithm.state.update_best(algorithm.population, new_fitness)
263
+
264
+ # PSO: re-evaluate personal bests on the new problem
265
+ if hasattr(algorithm, '_p_best') and hasattr(algorithm, '_p_best_fitness'):
266
+ pb_fitness = algorithm._evaluate(algorithm._p_best)
267
+ algorithm._p_best_fitness.copy_(pb_fitness)
268
+
269
+ # Update termination budget to add to existing evaluations/generations
270
+ _update_termination_budget(termination, algorithm)
271
+
272
+ # Setup result builder
273
+ builder = ResultBuilder()
274
+ builder.set_problem(problem)
275
+ builder.set_algorithm(algorithm)
276
+
277
+ # -------------------------------------------------------------------------
278
+ # Setup differentiable mode
279
+ # -------------------------------------------------------------------------
280
+
281
+ # Collect all learnable parameters (nn.Parameter with requires_grad=True)
282
+ # learnable_params = [p for p in algorithm.parameters() if p.requires_grad]
283
+
284
+ pop_params = []
285
+ hyper_params = []
286
+
287
+ for name, p in algorithm.named_parameters():
288
+ if not p.requires_grad:
289
+ continue
290
+ if name == "_population":
291
+ pop_params.append(p)
292
+ else:
293
+ hyper_params.append(p)
294
+
295
+ use_backprop = (len(pop_params) > 0) or (len(hyper_params) > 0)
296
+
297
+ lr_pop_eff, lr_hyper_eff, grad_clip_pop, grad_clip_hyper, defaults = _resolve_opt_defaults(
298
+ algorithm, problem, lr_pop, lr_hyper, grad_clip_pop, grad_clip_hyper)
299
+
300
+ optimizers: List[torch.optim.Optimizer] = []
301
+ schedulers: List[Optional[torch.optim.lr_scheduler.LRScheduler]] = []
302
+
303
+ if use_backprop:
304
+ # Create optimizer if not provided
305
+ if optimizer is None:
306
+ if len(pop_params) > 0 and isinstance(lr_pop_eff, (int, float)) and lr_pop_eff > 0:
307
+ optimizers.append(torch.optim.SGD(pop_params, lr=lr_pop_eff, momentum=defaults["pop_momentum"]))
308
+
309
+ if len(hyper_params) > 0 and isinstance(lr_hyper_eff, (int, float)) and lr_hyper_eff > 0:
310
+ optimizers.append(torch.optim.Adam(hyper_params, lr=lr_hyper_eff))
311
+
312
+ else:
313
+ # Accept a single optimizer or a list/tuple of optimizers
314
+ if isinstance(optimizer, (list, tuple)):
315
+ optimizers.extend(list(optimizer))
316
+ else:
317
+ optimizers.append(optimizer)
318
+
319
+ # Create LR scheduler
320
+ est_gens = _estimate_total_generations(termination, algorithm)
321
+ for opt in optimizers:
322
+ schedulers.append(
323
+ _create_scheduler(
324
+ opt,
325
+ scheduler,
326
+ scheduler_patience,
327
+ scheduler_factor,
328
+ min_lr,
329
+ total_generations=est_gens,
330
+ )
331
+ )
332
+ else:
333
+ optimizers = []
334
+ schedulers = []
335
+
336
+ # -------------------------------------------------------------------------
337
+ # Create callback state
338
+ # -------------------------------------------------------------------------
339
+
340
+ state = CallbackState(
341
+ generation=algorithm.generation,
342
+ n_evals=algorithm.n_evals,
343
+ max_evals=getattr(termination, 'max_evals', None),
344
+ max_generations=getattr(termination, 'max_gens', None),
345
+ best_fitness=algorithm.best_fitness,
346
+ best_solution=algorithm.best_solution,
347
+ current_fitness=algorithm.fitness,
348
+ current_population=algorithm.population,
349
+ algorithm=algorithm,
350
+ hyperparams=algorithm._get_hyperparams(),
351
+ )
352
+
353
+ # -------------------------------------------------------------------------
354
+ # Optimisation loop
355
+ # -------------------------------------------------------------------------
356
+
357
+ builder.start()
358
+ start_time = time.perf_counter()
359
+
360
+ # Notify callbacks of start
361
+ _call_callbacks(callbacks, "on_optimisation_start", state)
362
+
363
+ # Reset termination state
364
+ termination.reset()
365
+
366
+ while not termination.should_terminate(algorithm):
367
+ # Check callback early stopping
368
+ if state.stop_optimisation:
369
+ break
370
+
371
+ # Generation start callback
372
+ _call_callbacks(callbacks, "on_generation_start", state)
373
+
374
+ # Run one generation
375
+ if use_backprop:
376
+ _step_differentiable(
377
+ algorithm,
378
+ optimizers,
379
+ schedulers,
380
+ pop_params,
381
+ hyper_params,
382
+ grad_clip_pop,
383
+ grad_clip_hyper,
384
+ )
385
+ else:
386
+ algorithm.step()
387
+
388
+ # Update callback state
389
+ state.generation = algorithm.generation
390
+ state.n_evals = algorithm.n_evals
391
+ state.best_fitness = algorithm.best_fitness
392
+ state.best_solution = algorithm.best_solution
393
+ state.current_fitness = algorithm.fitness
394
+ state.current_population = algorithm.population
395
+ state.hyperparams = algorithm._get_hyperparams()
396
+ state.elapsed_time = time.perf_counter() - start_time
397
+
398
+ # Generation end callback
399
+ _call_callbacks(callbacks, "on_generation_end", state)
400
+
401
+ # -------------------------------------------------------------------------
402
+ # Finalize
403
+ # -------------------------------------------------------------------------
404
+
405
+ # Determine success (check if TargetReached was met)
406
+ success = _check_target_reached(termination, algorithm)
407
+
408
+ # Build result
409
+ builder.finish(algorithm, termination, success)
410
+
411
+ # Get history from callbacks
412
+ if save_history:
413
+ history = _collect_history(callbacks)
414
+ builder.set_history(history)
415
+
416
+ result = builder.build()
417
+
418
+ # Final callback
419
+ _call_callbacks(callbacks, "on_optimisation_end", state)
420
+
421
+ return result
422
+
423
+
424
+ # =============================================================================
425
+ # Helper Functions
426
+ # =============================================================================
427
+
428
+ def _resolve_opt_defaults(
429
+ algorithm: "Algorithm",
430
+ problem: "Problem",
431
+ lr_pop,
432
+ lr_hyper,
433
+ grad_clip_pop,
434
+ grad_clip_hyper,
435
+ ):
436
+ """Resolve per-algorithm optimiser defaults.
437
+
438
+ Sentinel convention:
439
+ - ``-1`` means "use the per-algorithm default from ``_OPT_DEFAULTS``".
440
+ This is the value the benchmark CLI passes by default.
441
+ - ``None`` means "disable" — no optimiser / no clipping.
442
+ This is the value ``minimize()`` uses when the caller omits the arg.
443
+ - Any other numeric value is used as-is.
444
+ """
445
+ alg_name = algorithm.__class__.__name__
446
+ defaults = _OPT_DEFAULTS.get(alg_name, _OPT_DEFAULTS["GA"])
447
+
448
+ # -1 means "use defaults", None means "disable" (for clips)
449
+ if lr_pop == -1:
450
+ lr_pop = defaults["lr_pop"]
451
+ if lr_hyper == -1:
452
+ lr_hyper = defaults["lr_hyper"]
453
+ if grad_clip_pop == -1:
454
+ grad_clip_pop = defaults["grad_clip_pop"]
455
+ if grad_clip_hyper == -1:
456
+ grad_clip_hyper = defaults["grad_clip_hyper"]
457
+
458
+ # Dimension scaling only if lr_pop > 0
459
+ if isinstance(lr_pop, (int, float)) and lr_pop > 0:
460
+ lr_pop_eff = lr_pop / (problem.n_var ** 0.5)
461
+ else:
462
+ lr_pop_eff = lr_pop # 0.0 or something explicit
463
+
464
+ return lr_pop_eff, lr_hyper, grad_clip_pop, grad_clip_hyper, defaults
465
+
466
+ def _parse_termination(termination: Optional[Termination]) -> Termination:
467
+ """
468
+ Parse termination argument into Termination instance.
469
+
470
+ Args:
471
+ termination: Termination instance or None for default.
472
+
473
+ Returns:
474
+ Termination instance.
475
+ """
476
+ if termination is None:
477
+ return default_termination()
478
+
479
+ if isinstance(termination, Termination):
480
+ return termination
481
+
482
+ raise TypeError(
483
+ f"termination must be a Termination instance or None. "
484
+ f"Got {type(termination).__name__}. "
485
+ f"Example: termination=MaxEvaluations(10000)"
486
+ )
487
+
488
+
489
+ def _update_termination_budget(
490
+ termination: Termination,
491
+ algorithm: "Algorithm",
492
+ ) -> None:
493
+ """
494
+ Update termination budget when continuing optimization (initialize=False).
495
+
496
+ Adds the current algorithm's evaluations/generations to the termination
497
+ criterion's budget, so the new budget is additive rather than absolute.
498
+
499
+ Args:
500
+ termination: The termination criterion to update.
501
+ algorithm: The algorithm with current evaluation/generation counts.
502
+ """
503
+ from evograd.core.termination import (
504
+ MaxEvaluations,
505
+ MaxGenerations,
506
+ TerminationCollection,
507
+ )
508
+
509
+ def _update_single(term: Termination) -> None:
510
+ if isinstance(term, MaxEvaluations):
511
+ # Add current evaluations to budget
512
+ term.max_evals += algorithm.n_evals
513
+ elif isinstance(term, MaxGenerations):
514
+ # Add current generations to budget
515
+ term.max_gens += algorithm.generation
516
+
517
+ if isinstance(termination, TerminationCollection):
518
+ # Update all criteria in the collection
519
+ for criterion in termination.criteria:
520
+ _update_single(criterion)
521
+ else:
522
+ _update_single(termination)
523
+
524
+
525
+ def _setup_callbacks(
526
+ callback: Optional[Union[Callback, List[Callback]]],
527
+ verbose: bool,
528
+ save_history: bool,
529
+ ) -> List[Callback]:
530
+ """Setup callback list with defaults."""
531
+ callbacks = []
532
+
533
+ # Always include history callback if saving history
534
+ if save_history:
535
+ callbacks.append(HistoryCallback())
536
+
537
+ # Add user callbacks
538
+ if callback is not None:
539
+ if isinstance(callback, list):
540
+ callbacks.extend(callback)
541
+ elif isinstance(callback, CallbackList):
542
+ callbacks.extend(callback.callbacks)
543
+ else:
544
+ callbacks.append(callback)
545
+
546
+ # Add print callback if verbose (and not already present)
547
+ if verbose:
548
+ has_print = any(isinstance(cb, PrintCallback) for cb in callbacks)
549
+ if not has_print:
550
+ callbacks.append(PrintCallback(every=1))
551
+
552
+ return callbacks
553
+
554
+
555
+ def _call_callbacks(callbacks: List[Callback], method: str, state: CallbackState) -> None:
556
+ """Call a method on all callbacks."""
557
+ for cb in callbacks:
558
+ getattr(cb, method)(state)
559
+
560
+
561
+ def _collect_history(callbacks: List[Callback]) -> Dict[str, List[Any]]:
562
+ """Collect history from HistoryCallback if present."""
563
+ for cb in callbacks:
564
+ if isinstance(cb, HistoryCallback):
565
+ return cb.to_dict()
566
+ return {}
567
+
568
+
569
+ def _check_target_reached(termination: Termination, algorithm: Algorithm) -> bool:
570
+ """Check if target was reached (for TargetReached termination)."""
571
+ if isinstance(termination, TargetReached):
572
+ best = algorithm.best_fitness
573
+ if termination.minimize:
574
+ return best <= termination.target_fitness
575
+ else:
576
+ return best >= termination.target_fitness
577
+
578
+ if isinstance(termination, TerminationCollection):
579
+ for criterion in termination.criteria:
580
+ if isinstance(criterion, TargetReached):
581
+ best = algorithm.best_fitness
582
+ if criterion.minimize:
583
+ if best <= criterion.target_fitness:
584
+ return True
585
+ else:
586
+ if best >= criterion.target_fitness:
587
+ return True
588
+
589
+ return False
590
+
591
+
592
+ def _estimate_total_generations(termination: Termination, algorithm: "Algorithm") -> int:
593
+ """
594
+ Estimate the total number of generations from the termination criterion.
595
+
596
+ Used to set ``T_max`` for the cosine-annealing scheduler so that the
597
+ learning-rate schedule matches the actual optimisation budget.
598
+
599
+ Falls back to 10 000 if no budget can be inferred.
600
+ """
601
+ from evograd.core.termination import MaxEvaluations, MaxGenerations, TerminationCollection
602
+
603
+ def _extract(term: Termination) -> Optional[int]:
604
+ if isinstance(term, MaxGenerations):
605
+ return term.max_gens
606
+ if isinstance(term, MaxEvaluations):
607
+ pop = max(algorithm.pop_size, 1)
608
+ return term.max_evals // pop
609
+ return None
610
+
611
+ if isinstance(termination, TerminationCollection):
612
+ for criterion in termination.criteria:
613
+ val = _extract(criterion)
614
+ if val is not None:
615
+ return val
616
+
617
+ val = _extract(termination)
618
+ if val is not None:
619
+ return val
620
+
621
+ return 10_000 # safe fallback
622
+
623
+
624
+ def _create_scheduler(
625
+ optimizer: torch.optim.Optimizer,
626
+ scheduler_type: Optional[str],
627
+ patience: int,
628
+ factor: float,
629
+ min_lr: float,
630
+ total_generations: int = 10_000,
631
+ ) -> Optional[torch.optim.lr_scheduler.LRScheduler]:
632
+ """Create learning rate scheduler.
633
+
634
+ Args:
635
+ total_generations: Estimated total generations for the optimisation
636
+ run. Used as ``T_max`` for the cosine-annealing scheduler.
637
+ """
638
+ if scheduler_type is None:
639
+ return None
640
+
641
+ scheduler_type = scheduler_type.lower()
642
+
643
+ if scheduler_type == "plateau":
644
+ return torch.optim.lr_scheduler.ReduceLROnPlateau(
645
+ optimizer,
646
+ mode="min",
647
+ factor=factor,
648
+ patience=patience,
649
+ min_lr=min_lr,
650
+ )
651
+ elif scheduler_type == "step":
652
+ return torch.optim.lr_scheduler.StepLR(
653
+ optimizer,
654
+ step_size=patience,
655
+ gamma=factor,
656
+ )
657
+ elif scheduler_type == "cosine":
658
+ return torch.optim.lr_scheduler.CosineAnnealingLR(
659
+ optimizer,
660
+ T_max=total_generations,
661
+ eta_min=min_lr,
662
+ )
663
+ elif scheduler_type == "exponential":
664
+ return torch.optim.lr_scheduler.ExponentialLR(
665
+ optimizer,
666
+ gamma=factor ** (1.0 / patience),
667
+ )
668
+ else:
669
+ raise ValueError(
670
+ f"Unknown scheduler type: {scheduler_type}. "
671
+ f"Use 'plateau', 'step', 'cosine', or 'exponential'."
672
+ )
673
+
674
+
675
+ def _step_differentiable(
676
+ algorithm: Algorithm,
677
+ optimizers: List[torch.optim.Optimizer],
678
+ schedulers: List[Optional[torch.optim.lr_scheduler.LRScheduler]],
679
+ pop_params: Optional[List],
680
+ hyper_params: Optional[List],
681
+ grad_clip_pop: Optional[float],
682
+ grad_clip_hyper: Optional[float],
683
+ ) -> float:
684
+ """
685
+ Perform one generation step with gradient-based updates.
686
+
687
+ Gradients automatically flow to all nn.Parameter tensors:
688
+ - Population (if algorithm.differentiable=True)
689
+ - Operator params (if operator.differentiable=True)
690
+ - Adaptive coefficients (if adaptive=True)
691
+
692
+ Args:
693
+ algorithm: The algorithm instance.
694
+ optimizer: PyTorch optimizer.
695
+ scheduler: Optional LR scheduler.
696
+ pop_params: population parameters.
697
+ hyper_params: hyperparam parameters.
698
+ grad_clip_pop: Maximum gradient norm for clipping the population gradient.
699
+ grad_clip_hyper: Maximum gradient norm for clipping the hyperparam gradient.
700
+
701
+ Returns:
702
+ Loss value (best fitness).
703
+ """
704
+ if algorithm.differentiable and isinstance(algorithm.population, torch.nn.Parameter):
705
+ algorithm.population.requires_grad_(True)
706
+
707
+ # Zero gradients
708
+ for opt in optimizers:
709
+ opt.zero_grad(set_to_none=True)
710
+
711
+ # Forward pass (builds computation graph)
712
+ loss = algorithm.forward()
713
+
714
+ # Backward pass
715
+ loss.backward()
716
+
717
+ # Gradient clipping
718
+ if grad_clip_pop is not None and pop_params:
719
+ torch.nn.utils.clip_grad_norm_(pop_params, grad_clip_pop)
720
+
721
+ if grad_clip_hyper is not None and hyper_params:
722
+ torch.nn.utils.clip_grad_norm_(hyper_params, grad_clip_hyper)
723
+
724
+ # Optimizer step
725
+ for opt in optimizers:
726
+ opt.step()
727
+
728
+ # Commit evolutionary changes
729
+ algorithm.update_state()
730
+
731
+ # Scheduler step
732
+ for sch in schedulers:
733
+ if sch is None:
734
+ continue
735
+ if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau):
736
+ sch.step(loss.item())
737
+ else:
738
+ sch.step()
739
+
740
+ return float(loss.detach())