heavyball 1.7.1__py3-none-any.whl → 2.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
heavyball/helpers.py ADDED
@@ -0,0 +1,804 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import math
5
+ import threading
6
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
7
+
8
+ import numpy
9
+ import numpy as np
10
+ import optuna
11
+ import optunahub
12
+ import pandas as pd
13
+ import torch
14
+ from botorch.utils.sampling import manual_seed
15
+ from hebo.design_space.design_space import DesignSpace
16
+ from hebo.optimizers.hebo import HEBO
17
+ from optuna._transform import _SearchSpaceTransform
18
+ from optuna.distributions import BaseDistribution, CategoricalDistribution, FloatDistribution, IntDistribution
19
+ from optuna.samplers import BaseSampler, CmaEsSampler, RandomSampler
20
+ from optuna.samplers._lazy_random_state import LazyRandomState
21
+ from optuna.study import Study
22
+ from optuna.study._study_direction import StudyDirection
23
+ from optuna.trial import FrozenTrial, TrialState
24
+ from optuna_integration.botorch import (
25
+ ehvi_candidates_func,
26
+ logei_candidates_func,
27
+ qehvi_candidates_func,
28
+ qei_candidates_func,
29
+ qparego_candidates_func,
30
+ )
31
+ from torch import Tensor
32
+ from torch.nn import functional as F
33
+
34
+ from heavyball.utils import scalar_guard
35
+
36
+ _MAXINT32 = (1 << 31) - 1
37
+ _SAMPLER_KEY = "auto:sampler"
38
+
39
+
40
+ class SimpleAPIBaseSampler(BaseSampler):
41
+ def __init__(
42
+ self,
43
+ search_space: dict[str, BaseDistribution] = None,
44
+ ):
45
+ self.search_space = search_space
46
+
47
+ def suggest_all(self, trial: FrozenTrial):
48
+ return {k: trial._suggest(k, dist) for k, dist in self.search_space.items()}
49
+
50
+
51
+ def _get_default_candidates_func(
52
+ n_objectives: int,
53
+ has_constraint: bool,
54
+ consider_running_trials: bool,
55
+ ) -> Callable[
56
+ [
57
+ Tensor,
58
+ Tensor,
59
+ Tensor | None,
60
+ Tensor,
61
+ Tensor | None,
62
+ ],
63
+ Tensor,
64
+ ]:
65
+ """
66
+ The original is available at https://github.com/optuna/optuna-integration/blob/156a8bc081322791015d2beefff9373ed7b24047/optuna_integration/botorch/botorch.py under the MIT License
67
+ """
68
+ if n_objectives > 3 and not has_constraint and not consider_running_trials:
69
+ return ehvi_candidates_func
70
+ elif n_objectives > 3:
71
+ return qparego_candidates_func
72
+ elif n_objectives > 1:
73
+ return qehvi_candidates_func
74
+ elif consider_running_trials:
75
+ return qei_candidates_func
76
+ else:
77
+ return logei_candidates_func
78
+
79
+
80
+ @functools.lru_cache(maxsize=None)
81
+ def bound_to_torch(bound: bytes, shape: tuple, device: str):
82
+ bound = np.frombuffer(bound, dtype=np.float64).reshape(shape)
83
+ bound = np.transpose(bound, (1, 0))
84
+ return torch.from_numpy(bound).to(torch.device(device))
85
+
86
+
87
+ @functools.lru_cache(maxsize=None)
88
+ def nextafter(x: Union[float, int], y: Union[float, int]) -> Union[float, int]:
89
+ return numpy.nextafter(x, y)
90
+
91
+
92
+ def _untransform_numerical_param_torch(
93
+ trans_param: Union[float, int, Tensor],
94
+ distribution: BaseDistribution,
95
+ transform_log: bool,
96
+ ) -> Tensor:
97
+ d = distribution
98
+
99
+ if isinstance(d, FloatDistribution):
100
+ if d.log:
101
+ param = trans_param.exp() if transform_log else trans_param
102
+ if d.single():
103
+ return param
104
+ return param.clamp(max=nextafter(d.high, d.high - 1))
105
+
106
+ if d.step is not None:
107
+ scaled = ((trans_param - d.low) / d.step).round() * d.step + d.low
108
+ return scaled.clamp(min=d.low, max=d.high)
109
+
110
+ if d.single():
111
+ return trans_param
112
+
113
+ return trans_param.clamp(max=nextafter(d.high, d.high - 1))
114
+
115
+ if not isinstance(d, IntDistribution):
116
+ raise ValueError(f"Unexpected distribution type: {type(d)}")
117
+
118
+ if d.log:
119
+ param = trans_param.exp().round() if transform_log else trans_param
120
+ else:
121
+ param = ((trans_param - d.low) / d.step).round() * d.step + d.low
122
+ param = param.clamp(min=d.low, max=d.high)
123
+ return param.to(torch.int64)
124
+
125
+
126
+ @torch.no_grad()
127
+ def untransform(self: _SearchSpaceTransform, trans_params: Tensor) -> dict[str, Any]:
128
+ assert trans_params.shape == (self._raw_bounds.shape[0],)
129
+
130
+ if self._transform_0_1:
131
+ trans_params = self._raw_bounds[:, 0] + trans_params * (self._raw_bounds[:, 1] - self._raw_bounds[:, 0])
132
+
133
+ params = {}
134
+
135
+ for (name, distribution), encoded_columns in zip(self._search_space.items(), self.column_to_encoded_columns):
136
+ if isinstance(distribution, CategoricalDistribution):
137
+ raise ValueError("We don't support categorical parameters.")
138
+ else:
139
+ param = _untransform_numerical_param_torch(trans_params[encoded_columns], distribution, self._transform_log)
140
+
141
+ params[name] = param
142
+
143
+ return {n: v.item() for n, v in params.items()}
144
+
145
+
146
+ class BoTorchSampler(SimpleAPIBaseSampler):
147
+ """
148
+ A significantly more efficient implementation of `BoTorchSampler` from Optuna - keeps more on the GPU / in torch
149
+ The original is available at https://github.com/optuna/optuna-integration/blob/156a8bc081322791015d2beefff9373ed7b24047/optuna_integration/botorch/botorch.py under the MIT License
150
+ The original API is kept for backward compatibility, but many arguments are ignored to improve maintainability.
151
+ """
152
+
153
+ def __init__(
154
+ self,
155
+ search_space: dict[str, BaseDistribution] = None,
156
+ *,
157
+ candidates_func: None = None,
158
+ constraints_func: None = None,
159
+ n_startup_trials: int = 10,
160
+ consider_running_trials: bool = False,
161
+ independent_sampler: None = None,
162
+ seed: int | None = None,
163
+ device: torch.device | str | None = None,
164
+ trial_chunks: int = 128,
165
+ ):
166
+ assert constraints_func is None
167
+ assert candidates_func is None
168
+ assert consider_running_trials is False
169
+ assert independent_sampler is None
170
+ self._candidates_func = None
171
+ self._independent_sampler = RandomSampler(seed=seed)
172
+ self._n_startup_trials = n_startup_trials
173
+ self._seed = seed
174
+ self.trial_chunks = trial_chunks
175
+
176
+ self._study_id: int | None = None
177
+ self.search_space = search_space
178
+ if isinstance(device, str):
179
+ device = torch.device(device)
180
+ self._device = device or torch.device("cpu")
181
+ self.seen_trials = set()
182
+ self._values = None
183
+ self._params = None
184
+ self._index = 0
185
+
186
+ def infer_relative_search_space(self, study: Study, trial: FrozenTrial) -> dict[str, BaseDistribution]:
187
+ return self.search_space
188
+
189
+ @torch.no_grad()
190
+ def _preprocess_trials(
191
+ self, trans: _SearchSpaceTransform, study: Study, trials: list[FrozenTrial]
192
+ ) -> Tuple[int, Tensor, Tensor]:
193
+ new_trials = []
194
+ for trial in trials:
195
+ tid: int = trial._trial_id
196
+ if tid not in self.seen_trials:
197
+ self.seen_trials.add(tid)
198
+ new_trials.append(trial)
199
+ trials = new_trials
200
+
201
+ n_objectives = len(study.directions)
202
+ if not new_trials:
203
+ return n_objectives, self._values[: self._index], self._params[: self._index]
204
+
205
+ n_completed_trials = len(trials)
206
+ values: numpy.ndarray = numpy.empty((n_completed_trials, n_objectives), dtype=numpy.float64)
207
+ params: numpy.ndarray = numpy.empty((n_completed_trials, trans.bounds.shape[0]), dtype=numpy.float64)
208
+ for trial_idx, trial in enumerate(trials):
209
+ if trial.state != TrialState.COMPLETE:
210
+ raise ValueError(f"TrialState must be COMPLETE, but {trial.state} was found.")
211
+
212
+ params[trial_idx] = trans.transform(trial.params)
213
+ values[trial_idx, :] = np.array(trial.values)
214
+
215
+ for obj_idx, direction in enumerate(study.directions):
216
+ if direction == StudyDirection.MINIMIZE: # BoTorch always assumes maximization.
217
+ values[:, obj_idx] *= -1
218
+
219
+ if self._values is None:
220
+ self._values = torch.zeros((self.trial_chunks, n_objectives), dtype=torch.float64, device=self._device)
221
+ self._params = torch.zeros(
222
+ (self.trial_chunks, trans.bounds.shape[0]), dtype=torch.float64, device=self._device
223
+ )
224
+ spillage = (self._index + n_completed_trials) - self._values.size(0)
225
+ if spillage > 0:
226
+ pad = int(math.ceil(spillage / self.trial_chunks) * self.trial_chunks)
227
+ self._values = F.pad(self._values, (0, 0, 0, pad))
228
+ self._params = F.pad(self._params, (0, 0, 0, pad))
229
+ self._values[self._index : self._index + n_completed_trials] = torch.from_numpy(values)
230
+ self._params[self._index : self._index + n_completed_trials] = torch.from_numpy(params)
231
+ self._index += n_completed_trials
232
+
233
+ return n_objectives, self._values[: self._index], self._params[: self._index]
234
+
235
+ def sample_relative(
236
+ self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
237
+ ) -> dict[str, Any]:
238
+ assert isinstance(search_space, dict)
239
+
240
+ if len(search_space) == 0:
241
+ return {}
242
+
243
+ completed_trials = study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,))
244
+
245
+ n_completed_trials = len(completed_trials)
246
+ if n_completed_trials < self._n_startup_trials:
247
+ return {}
248
+
249
+ trans = _SearchSpaceTransform(search_space)
250
+ n_objectives, values, params = self._preprocess_trials(trans, study, completed_trials)
251
+
252
+ if self._candidates_func is None:
253
+ self._candidates_func = _get_default_candidates_func(
254
+ n_objectives=n_objectives, has_constraint=False, consider_running_trials=False
255
+ )
256
+
257
+ bounds = bound_to_torch(trans.bounds.tobytes(), trans.bounds.shape, str(self._device))
258
+
259
+ with manual_seed(self._seed):
260
+ candidates = self._candidates_func(params, values, None, bounds, None)
261
+ if self._seed is not None:
262
+ self._seed += 1
263
+
264
+ if not isinstance(candidates, torch.Tensor):
265
+ raise TypeError("Candidates must be a torch.Tensor.")
266
+ if candidates.dim() == 2:
267
+ if candidates.size(0) != 1:
268
+ raise ValueError(
269
+ "Candidates batch optimization is not supported and the first dimension must "
270
+ "have size 1 if candidates is a two-dimensional tensor. Actual: "
271
+ f"{candidates.size()}."
272
+ )
273
+ candidates = candidates.squeeze(0)
274
+ if candidates.dim() != 1:
275
+ raise ValueError("Candidates must be one or two-dimensional.")
276
+ if candidates.size(0) != bounds.size(1):
277
+ raise ValueError(
278
+ "Candidates size must match with the given bounds. Actual candidates: "
279
+ f"{candidates.size(0)}, bounds: {bounds.size(1)}."
280
+ )
281
+ return untransform(trans, candidates)
282
+
283
+ def sample_independent(
284
+ self,
285
+ study: Study,
286
+ trial: FrozenTrial,
287
+ param_name: str,
288
+ param_distribution: BaseDistribution,
289
+ ) -> Any:
290
+ return self._independent_sampler.sample_independent(study, trial, param_name, param_distribution)
291
+
292
+ def reseed_rng(self) -> None:
293
+ self._independent_sampler.reseed_rng()
294
+ if self._seed is not None:
295
+ self._seed = numpy.random.RandomState().randint(numpy.iinfo(numpy.int32).max)
296
+
297
+ def before_trial(self, study: Study, trial: FrozenTrial) -> None:
298
+ self._independent_sampler.before_trial(study, trial)
299
+
300
+ def after_trial(
301
+ self,
302
+ study: Study,
303
+ trial: FrozenTrial,
304
+ state: TrialState,
305
+ values: Sequence[float] | None,
306
+ ) -> None:
307
+ self._independent_sampler.after_trial(study, trial, state, values)
308
+
309
+
310
+ def _convert_to_hebo_design_space(search_space: dict[str, BaseDistribution]) -> DesignSpace:
311
+ if not search_space:
312
+ raise ValueError("Empty search space.")
313
+ design_space = []
314
+ for name, distribution in search_space.items():
315
+ config: dict[str, Any] = {"name": name}
316
+ if isinstance(distribution, (FloatDistribution, IntDistribution)):
317
+ if not distribution.log and distribution.step is not None:
318
+ config["type"] = "int"
319
+ n_steps = int(np.round((distribution.high - distribution.low) / distribution.step + 1))
320
+ config["lb"] = 0
321
+ config["ub"] = n_steps - 1
322
+ else:
323
+ config["lb"] = distribution.low
324
+ config["ub"] = distribution.high
325
+ if distribution.log:
326
+ config["type"] = "pow_int" if isinstance(distribution, IntDistribution) else "pow"
327
+ else:
328
+ assert not isinstance(distribution, IntDistribution)
329
+ config["type"] = "num"
330
+ else:
331
+ raise NotImplementedError(f"Unsupported distribution: {distribution}")
332
+
333
+ design_space.append(config)
334
+ return DesignSpace().parse(design_space)
335
+
336
+
337
+ class HEBOSampler(optunahub.samplers.SimpleBaseSampler, SimpleAPIBaseSampler):
338
+ """
339
+ Simplified version of https://github.com/optuna/optunahub-registry/blob/89da32cfc845c4275549000369282631c70bdaff/package/samplers/hebo/sampler.py
340
+ modified under the MIT License
341
+ """
342
+
343
+ def __init__(
344
+ self,
345
+ search_space: dict[str, BaseDistribution],
346
+ *,
347
+ seed: int | None = None,
348
+ constant_liar: bool = False,
349
+ independent_sampler: BaseSampler | None = None,
350
+ ) -> None:
351
+ super().__init__(search_space, seed)
352
+ assert constant_liar is False
353
+ assert independent_sampler is None
354
+ self._hebo = HEBO(_convert_to_hebo_design_space(search_space), scramble_seed=self._seed)
355
+ self._independent_sampler = optuna.samplers.RandomSampler(seed=seed)
356
+ self._rng = np.random.default_rng(seed)
357
+
358
+ def sample_relative(
359
+ self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
360
+ ) -> dict[str, Any]:
361
+ params = {}
362
+ for name, row in self._hebo.suggest().items():
363
+ if name not in search_space:
364
+ continue
365
+
366
+ dist = search_space[name]
367
+ if isinstance(dist, (IntDistribution, FloatDistribution)) and not dist.log and dist.step is not None:
368
+ step_index = row.iloc[0]
369
+ params[name] = dist.low + step_index * dist.step
370
+ else:
371
+ params[name] = row.iloc[0]
372
+ return params
373
+
374
+ def after_trial(
375
+ self,
376
+ study: Study,
377
+ trial: FrozenTrial,
378
+ state: TrialState,
379
+ values: Sequence[float] | None,
380
+ ) -> None:
381
+ if self._hebo is None or values is None:
382
+ return
383
+ sign = 1 if study.direction == StudyDirection.MINIMIZE else -1
384
+ values = np.array([values[0]])
385
+ worst_value = np.nanmax(values) if study.direction == StudyDirection.MINIMIZE else np.nanmin(values)
386
+ nan_padded_values = sign * np.where(np.isnan(values), worst_value, values)[:, np.newaxis]
387
+ params = pd.DataFrame([trial.params])
388
+ for name, dist in trial.distributions.items():
389
+ if isinstance(dist, (IntDistribution, FloatDistribution)) and not dist.log and dist.step is not None:
390
+ params[name] = (params[name] - dist.low) / dist.step
391
+
392
+ self._hebo.observe(params, nan_padded_values)
393
+
394
+ def infer_relative_search_space(self, study: Study, trial: FrozenTrial) -> dict[str, BaseDistribution]:
395
+ return self.search_space
396
+
397
+ def sample_independent(
398
+ self,
399
+ study: Study,
400
+ trial: FrozenTrial,
401
+ param_name: str,
402
+ param_distribution: BaseDistribution,
403
+ ) -> Any:
404
+ return self._independent_sampler.sample_independent(study, trial, param_name, param_distribution)
405
+
406
+
407
+ class FastINGO:
408
+ """
409
+ Taken from https://github.com/optuna/optunahub-registry/blob/89da32cfc845c4275549000369282631c70bdaff/package/samplers/implicit_natural_gradient/sampler.py
410
+ under the MIT License
411
+ """
412
+
413
+ def __init__(
414
+ self,
415
+ mean: np.ndarray,
416
+ inv_sigma: np.ndarray,
417
+ lower: np.ndarray,
418
+ upper: np.ndarray,
419
+ seed: Optional[int] = None,
420
+ population_size: Optional[int] = None,
421
+ learning_rate: Optional[float] = None,
422
+ last_n: int = 4096,
423
+ loco_step_size: float = 1,
424
+ device="cuda",
425
+ batchnorm_decay: float = 0.99,
426
+ score_decay: float = 0.99,
427
+ ) -> None:
428
+ n_dimension = len(mean)
429
+ if population_size is None:
430
+ population_size = 4 + int(np.floor(3 * np.log(n_dimension)))
431
+ population_size = 2 * (population_size // 2)
432
+
433
+ self.last_n = last_n
434
+ self.batchnorm_decay = batchnorm_decay
435
+ self.score_decay = score_decay
436
+ self._learning_rate = learning_rate or 1.0 / np.sqrt(n_dimension)
437
+ self._mean = torch.from_numpy(mean).to(device)
438
+ self._sigma = torch.from_numpy(inv_sigma).to(device)
439
+ self._lower = torch.from_numpy(lower).to(device)
440
+ self._upper = torch.from_numpy(upper).to(device)
441
+ self.generator = torch.Generator(device=device)
442
+ self.generator.manual_seed(0x123123 if seed is None else seed)
443
+ self.loco_step_size = loco_step_size
444
+ self._population_size = population_size
445
+ self.device = device
446
+
447
+ self._ys = None
448
+ self._means = None
449
+ self._z = None
450
+ self._stds = None
451
+ self._g = 0
452
+
453
+ @torch.no_grad()
454
+ def _concat(self, name, x):
455
+ item = getattr(self, name, None)
456
+ if isinstance(x, np.ndarray):
457
+ x = torch.from_numpy(x).to(self.device)
458
+ elif not isinstance(x, torch.Tensor):
459
+ x = scalar_guard(x, self._mean).view(1)
460
+ if item is not None:
461
+ x = torch.cat((item, x), dim=0)[-self.last_n :]
462
+ setattr(self, name, x)
463
+
464
+ @property
465
+ def dim(self) -> int:
466
+ return self._mean.shape[0]
467
+
468
+ @property
469
+ def generation(self) -> int:
470
+ return self._g
471
+
472
+ @property
473
+ def population_size(self) -> int:
474
+ return self._population_size
475
+
476
+ @torch.no_grad()
477
+ def ask(self) -> np.ndarray:
478
+ dimension = self._mean.shape[0]
479
+ z = torch.randn(dimension, generator=self.generator, device=self.device, dtype=torch.float64)
480
+ self._concat("_z", z[None])
481
+ self._concat("_means", self._mean[None])
482
+ self._concat("_stds", self._sigma[None])
483
+ x = z / self._sigma.clamp(min=1e-8).sqrt() + self._mean
484
+ return x.clamp(min=self._lower, max=self._upper).cpu().numpy()
485
+
486
+ @torch.no_grad()
487
+ def tell(self, y: float) -> None:
488
+ self._g += 1
489
+ self._concat("_ys", y)
490
+ y = self._ys
491
+ if y.numel() <= 2:
492
+ return
493
+
494
+ y = y + torch.where(y.min() <= 0, 1e-8 - y.min(), 0)
495
+ y = y.log()
496
+
497
+ ema = -torch.arange(y.size(0), device=y.device, dtype=y.dtype)
498
+ weight = self.batchnorm_decay**ema
499
+ weight = weight / weight.sum().clamp(min=1e-8)
500
+ y_mean = weight @ y
501
+ y_mean_sq = weight @ y.square()
502
+ y_std = (y_mean_sq - y_mean.square()).clamp(min=1e-8).sqrt()
503
+ score = (y.view(-1, 1) - y_mean) / y_std
504
+
505
+ z = self._z
506
+ mean_orig = self._means
507
+ sigma_orig = self._stds
508
+ mean_grad = score * (z / sigma_orig.clamp(min=1e-8).sqrt())
509
+ sigma_grad = -score * z.square() * sigma_orig
510
+ target_mean = mean_orig - mean_grad * self.loco_step_size # MSE(current, target)
511
+ target_sigma = sigma_orig - sigma_grad * self.loco_step_size
512
+
513
+ weight = self.score_decay**ema
514
+ weight = weight / weight.sum().clamp(min=1e-8)
515
+ self._mean, self._sigma = weight @ target_mean, weight @ target_sigma
516
+
517
+
518
+ class ImplicitNaturalGradientSampler(BaseSampler):
519
+ """
520
+ Taken from https://github.com/optuna/optunahub-registry/blob/89da32cfc845c4275549000369282631c70bdaff/package/samplers/implicit_natural_gradient/sampler.py
521
+ under the MIT License
522
+ """
523
+
524
+ def __init__(
525
+ self,
526
+ search_space: Dict[str, BaseDistribution],
527
+ x0: Optional[Dict[str, Any]] = None,
528
+ sigma0: Optional[float] = None,
529
+ lr: Optional[float] = None,
530
+ n_startup_trials: int = 1,
531
+ independent_sampler: Optional[BaseSampler] = None,
532
+ warn_independent_sampling: bool = True,
533
+ seed: Optional[int] = None,
534
+ population_size: Optional[int] = None,
535
+ ) -> None:
536
+ self.search_space = search_space
537
+ self._x0 = x0
538
+ self._sigma0 = sigma0
539
+ self._lr = lr
540
+ self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
541
+ self._n_startup_trials = n_startup_trials
542
+ self._warn_independent_sampling = warn_independent_sampling
543
+ self._optimizer: Optional[FastINGO] = None
544
+ self._seed = seed
545
+ self._population_size = population_size
546
+
547
+ self._param_queue: List[Dict[str, Any]] = []
548
+
549
+ def _get_optimizer(self) -> FastINGO:
550
+ assert self._optimizer is not None
551
+ return self._optimizer
552
+
553
+ def reseed_rng(self) -> None:
554
+ self._independent_sampler.reseed_rng()
555
+ if self._optimizer:
556
+ self._optimizer._rng.seed()
557
+
558
+ def infer_relative_search_space(
559
+ self, study: "optuna.Study", trial: "optuna.trial.FrozenTrial"
560
+ ) -> Dict[str, BaseDistribution]:
561
+ search_space: Dict[str, BaseDistribution] = {}
562
+ for name, distribution in self.search_space.items():
563
+ if distribution.single():
564
+ # `cma` cannot handle distributions that contain just a single value, so we skip
565
+ # them. Note that the parameter values for such distributions are sampled in
566
+ # `Trial`.
567
+ continue
568
+
569
+ if not isinstance(
570
+ distribution,
571
+ (
572
+ optuna.distributions.FloatDistribution,
573
+ optuna.distributions.IntDistribution,
574
+ ),
575
+ ):
576
+ # Categorical distribution is unsupported.
577
+ continue
578
+ search_space[name] = distribution
579
+
580
+ return search_space
581
+
582
+ def _check_trial_is_generation(self, trial: FrozenTrial) -> bool:
583
+ current_gen = self._get_optimizer().generation
584
+ trial_gen = trial.system_attrs.get("ingo", -1)
585
+ return current_gen == trial_gen
586
+
587
+ def sample_relative(
588
+ self,
589
+ study: "optuna.Study",
590
+ trial: "optuna.trial.FrozenTrial",
591
+ search_space: Dict[str, BaseDistribution],
592
+ ) -> Dict[str, Any]:
593
+ self._raise_error_if_multi_objective(study)
594
+
595
+ if len(search_space) == 0:
596
+ return {}
597
+
598
+ completed_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
599
+ if len(completed_trials) < self._n_startup_trials:
600
+ return {}
601
+
602
+ if len(search_space) == 1:
603
+ self._warn_independent_sampling = False
604
+ return {}
605
+
606
+ trans = _SearchSpaceTransform(search_space)
607
+
608
+ if self._optimizer is None:
609
+ self._optimizer = self._init_optimizer(trans, population_size=self._population_size)
610
+
611
+ if self._optimizer.dim != len(trans.bounds):
612
+ self._warn_independent_sampling = False
613
+ return {}
614
+
615
+ solution_trials = [t for t in completed_trials if self._check_trial_is_generation(t)]
616
+ for t in solution_trials:
617
+ self._optimizer.tell(-t.value if study.direction == StudyDirection.MAXIMIZE else t.value)
618
+
619
+ study._storage.set_trial_system_attr(trial._trial_id, "ingo", self._get_optimizer().generation)
620
+ return trans.untransform(self._optimizer.ask())
621
+
622
+ def _init_optimizer(
623
+ self,
624
+ trans: _SearchSpaceTransform,
625
+ population_size: Optional[int] = None,
626
+ ) -> FastINGO:
627
+ lower_bounds = trans.bounds[:, 0]
628
+ upper_bounds = trans.bounds[:, 1]
629
+ n_dimension = len(trans.bounds)
630
+
631
+ if self._x0 is None:
632
+ mean = lower_bounds + (upper_bounds - lower_bounds) / 2
633
+ else:
634
+ mean = trans.transform(self._x0)
635
+
636
+ if self._sigma0 is None:
637
+ sigma0 = np.min((upper_bounds - lower_bounds) / 6)
638
+ else:
639
+ sigma0 = self._sigma0
640
+ inv_sigma = 1 / sigma0 * np.ones(n_dimension)
641
+
642
+ return FastINGO(
643
+ mean=mean,
644
+ inv_sigma=inv_sigma,
645
+ lower=lower_bounds,
646
+ upper=upper_bounds,
647
+ seed=self._seed,
648
+ population_size=population_size,
649
+ learning_rate=self._lr,
650
+ )
651
+
652
+ def sample_independent(
653
+ self,
654
+ study: "optuna.Study",
655
+ trial: "optuna.trial.FrozenTrial",
656
+ param_name: str,
657
+ param_distribution: BaseDistribution,
658
+ ) -> Any:
659
+ self._raise_error_if_multi_objective(study)
660
+
661
+ return self._independent_sampler.sample_independent(study, trial, param_name, param_distribution)
662
+
663
+ def after_trial(
664
+ self,
665
+ study: "optuna.Study",
666
+ trial: "optuna.trial.FrozenTrial",
667
+ state: TrialState,
668
+ values: Optional[Sequence[float]],
669
+ ) -> None:
670
+ self._independent_sampler.after_trial(study, trial, state, values)
671
+
672
+
673
+ class ThreadLocalSampler(threading.local):
674
+ sampler: BaseSampler | None = None
675
+
676
+
677
+ def init_cmaes(study, seed, trials, search_space):
678
+ trials.sort(key=lambda trial: trial.datetime_complete)
679
+ return CmaEsSampler(seed=seed, source_trials=trials, lr_adapt=True)
680
+
681
+
682
+ def init_hebo(study, seed, trials, search_space):
683
+ sampler = HEBOSampler(search_space=search_space, seed=seed)
684
+ for trial in trials:
685
+ sampler.after_trial(study, trial, TrialState.COMPLETE, trial.values)
686
+ return sampler
687
+
688
+
689
+ def init_botorch(study, seed, trials, search_space):
690
+ return BoTorchSampler(search_space=search_space, seed=seed, device="cuda") # will automatically pull in latest data
691
+
692
+
693
+ def init_nsgaii(study, seed, trials, search_space):
694
+ module = optunahub.load_module(
695
+ "samplers/nsgaii_with_initial_trials",
696
+ )
697
+ return module.NSGAIIwITSampler(seed=seed)
698
+
699
+
700
+ def init_ingo(study, seed, trials, search_space):
701
+ return ImplicitNaturalGradientSampler(search_space=search_space, seed=seed)
702
+
703
+
704
+ class AutoSampler(BaseSampler):
705
+ def __init__(
706
+ self,
707
+ samplers: Iterable[Tuple[int, Callable]] | None = None,
708
+ search_space: dict[str, BaseDistribution] = None,
709
+ *,
710
+ seed: int | None = None,
711
+ constraints_func: None = None,
712
+ ) -> None:
713
+ assert constraints_func is None
714
+ if samplers is None:
715
+ samplers = ((0, init_hebo), (100, init_nsgaii))
716
+ self.sampler_indices = np.sort(np.array([x[0] for x in samplers], dtype=np.int32))
717
+ self.samplers = [x[1] for x in sorted(samplers, key=lambda x: x[0])]
718
+ self.search_space = search_space
719
+ self._rng = LazyRandomState(seed)
720
+ self._random_sampler = RandomSampler(seed=seed)
721
+ self._thread_local_sampler = ThreadLocalSampler()
722
+ self._constraints_func = constraints_func
723
+ self._completed_trials = 0
724
+ self._current_index = -1
725
+
726
+ def __getstate__(self) -> dict[Any, Any]:
727
+ state = self.__dict__.copy()
728
+ del state["_thread_local_sampler"]
729
+ return state
730
+
731
+ def __setstate__(self, state: dict[Any, Any]) -> None:
732
+ self.__dict__.update(state)
733
+ self._thread_local_sampler = ThreadLocalSampler()
734
+
735
+ @property
736
+ def _sampler(self) -> BaseSampler:
737
+ if self._thread_local_sampler.sampler is None:
738
+ seed_for_random_sampler = self._rng.rng.randint(_MAXINT32)
739
+ self._sampler = RandomSampler(seed=seed_for_random_sampler)
740
+
741
+ return self._thread_local_sampler.sampler
742
+
743
+ @_sampler.setter
744
+ def _sampler(self, sampler: BaseSampler) -> None:
745
+ self._thread_local_sampler.sampler = sampler
746
+
747
+ def reseed_rng(self) -> None:
748
+ self._rng.rng.seed()
749
+ self._sampler.reseed_rng()
750
+
751
+ def _update_sampler(self, study: Study):
752
+ if len(study.directions) > 1:
753
+ raise ValueError("Multi-objective optimization is not supported.")
754
+
755
+ if isinstance(self._sampler, CmaEsSampler):
756
+ return
757
+
758
+ complete_trials = study._get_trials(deepcopy=False, states=(TrialState.COMPLETE,), use_cache=True)
759
+ self._completed_trials = max(self._completed_trials, len(complete_trials))
760
+ new_index = (self._completed_trials >= self.sampler_indices).sum() - 1
761
+ if new_index == self._current_index:
762
+ return
763
+ self._current_index = new_index
764
+ self._sampler = self.samplers[new_index](
765
+ study, self._rng.rng.randint(_MAXINT32), complete_trials, self.search_space
766
+ )
767
+
768
+ def infer_relative_search_space(self, study: Study, trial: FrozenTrial) -> dict[str, BaseDistribution]:
769
+ return self._sampler.infer_relative_search_space(study, trial)
770
+
771
+ def sample_relative(
772
+ self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
773
+ ) -> dict[str, Any]:
774
+ return self._sampler.sample_relative(study, trial, self.search_space)
775
+
776
+ def sample_independent(
777
+ self,
778
+ study: Study,
779
+ trial: FrozenTrial,
780
+ param_name: str,
781
+ param_distribution: BaseDistribution,
782
+ ) -> Any:
783
+ return self._random_sampler.sample_independent(study, trial, param_name, param_distribution)
784
+
785
+ def before_trial(self, study: Study, trial: FrozenTrial) -> None:
786
+ # NOTE(nabenabe): Sampler must be updated in this method. If, for example, it is updated in
787
+ # infer_relative_search_space, the sampler for before_trial and that for sample_relative,
788
+ # after_trial might be different, meaning that the sampling routine could be incompatible.
789
+ if len(study._get_trials(deepcopy=False, states=(TrialState.COMPLETE,), use_cache=True)) != 0:
790
+ self._update_sampler(study)
791
+
792
+ sampler_name = self._sampler.__class__.__name__
793
+ study._storage.set_trial_system_attr(trial._trial_id, _SAMPLER_KEY, sampler_name)
794
+ self._sampler.before_trial(study, trial)
795
+
796
+ def after_trial(
797
+ self,
798
+ study: Study,
799
+ trial: FrozenTrial,
800
+ state: TrialState,
801
+ values: Sequence[float] | None,
802
+ ) -> None:
803
+ assert state in [TrialState.COMPLETE, TrialState.FAIL, TrialState.PRUNED]
804
+ self._sampler.after_trial(study, trial, state, values)