opik-optimizer 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +10 -0
- opik_optimizer/base_optimizer.py +33 -0
- opik_optimizer/optimization_result.py +227 -6
- opik_optimizer/parameter_optimizer/__init__.py +11 -0
- opik_optimizer/parameter_optimizer/parameter_optimizer.py +382 -0
- opik_optimizer/parameter_optimizer/parameter_search_space.py +125 -0
- opik_optimizer/parameter_optimizer/parameter_spec.py +214 -0
- opik_optimizer/parameter_optimizer/search_space_types.py +24 -0
- opik_optimizer/parameter_optimizer/sensitivity_analysis.py +71 -0
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.0.dist-info}/METADATA +3 -1
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.0.dist-info}/RECORD +14 -8
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.0.dist-info}/top_level.txt +0 -0
opik_optimizer/__init__.py
CHANGED
@@ -18,6 +18,12 @@ from .meta_prompt_optimizer import MetaPromptOptimizer
|
|
18
18
|
from .mipro_optimizer import MiproOptimizer
|
19
19
|
from .optimization_config.configs import TaskConfig
|
20
20
|
from .optimization_result import OptimizationResult
|
21
|
+
from .parameter_optimizer import (
|
22
|
+
ParameterOptimizer,
|
23
|
+
ParameterSearchSpace,
|
24
|
+
ParameterSpec,
|
25
|
+
ParameterType,
|
26
|
+
)
|
21
27
|
|
22
28
|
__version__ = importlib.metadata.version("opik_optimizer")
|
23
29
|
|
@@ -34,9 +40,13 @@ __all__ = [
|
|
34
40
|
"MetaPromptOptimizer",
|
35
41
|
"MiproOptimizer",
|
36
42
|
"EvolutionaryOptimizer",
|
43
|
+
"ParameterOptimizer",
|
37
44
|
"OptimizationResult",
|
38
45
|
"OptimizableAgent",
|
39
46
|
"setup_logging",
|
40
47
|
"datasets",
|
41
48
|
"TaskConfig",
|
49
|
+
"ParameterSearchSpace",
|
50
|
+
"ParameterSpec",
|
51
|
+
"ParameterType",
|
42
52
|
]
|
opik_optimizer/base_optimizer.py
CHANGED
@@ -470,6 +470,39 @@ class BaseOptimizer(ABC):
|
|
470
470
|
f"{self.__class__.__name__} does not implement optimize_mcp yet."
|
471
471
|
)
|
472
472
|
|
473
|
+
def optimize_parameter(
|
474
|
+
self,
|
475
|
+
prompt: "chat_prompt.ChatPrompt",
|
476
|
+
dataset: Dataset,
|
477
|
+
metric: Callable,
|
478
|
+
parameter_space: Any,
|
479
|
+
experiment_config: dict | None = None,
|
480
|
+
n_trials: int | None = None,
|
481
|
+
n_samples: int | None = None,
|
482
|
+
agent_class: type[OptimizableAgent] | None = None,
|
483
|
+
**kwargs: Any,
|
484
|
+
) -> optimization_result.OptimizationResult:
|
485
|
+
"""
|
486
|
+
Optimize LLM call parameters such as temperature or top_k.
|
487
|
+
|
488
|
+
Args:
|
489
|
+
prompt: The chat prompt to evaluate with tuned parameters
|
490
|
+
dataset: Dataset providing evaluation examples
|
491
|
+
metric: Objective function to maximize
|
492
|
+
parameter_space: Definition of the search space for tunable parameters
|
493
|
+
experiment_config: Optional experiment metadata
|
494
|
+
n_trials: Number of trials to run (optimizer specific default if None)
|
495
|
+
n_samples: Number of dataset samples to evaluate per trial (None for all)
|
496
|
+
agent_class: Optional custom agent class to execute evaluations
|
497
|
+
**kwargs: Additional optimizer specific settings
|
498
|
+
|
499
|
+
Returns:
|
500
|
+
OptimizationResult: Structured result describing the best parameters found
|
501
|
+
"""
|
502
|
+
raise NotImplementedError(
|
503
|
+
f"{self.__class__.__name__} does not implement optimize_parameter yet."
|
504
|
+
)
|
505
|
+
|
473
506
|
def get_history(self) -> list[OptimizationRound]:
|
474
507
|
"""
|
475
508
|
Get the optimization history.
|
@@ -8,6 +8,13 @@ import rich
|
|
8
8
|
from .reporting_utils import get_console, get_link_text, get_optimization_run_url_by_id
|
9
9
|
|
10
10
|
|
11
|
+
def _format_float(value: Any, digits: int = 6) -> str:
|
12
|
+
"""Format float values with specified precision."""
|
13
|
+
if isinstance(value, float):
|
14
|
+
return f"{value:.{digits}f}"
|
15
|
+
return str(value)
|
16
|
+
|
17
|
+
|
11
18
|
class OptimizationResult(pydantic.BaseModel):
|
12
19
|
"""Result oan optimization run."""
|
13
20
|
|
@@ -44,6 +51,50 @@ class OptimizationResult(pydantic.BaseModel):
|
|
44
51
|
def model_dump(self, *kargs: Any, **kwargs: Any) -> dict[str, Any]:
|
45
52
|
return super().model_dump(*kargs, **kwargs)
|
46
53
|
|
54
|
+
def get_optimized_model_kwargs(self) -> dict[str, Any]:
|
55
|
+
"""
|
56
|
+
Extract optimized model_kwargs for use in other optimizers.
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
Dictionary of optimized model kwargs, empty dict if not available
|
60
|
+
"""
|
61
|
+
return self.details.get("optimized_model_kwargs", {})
|
62
|
+
|
63
|
+
def get_optimized_model(self) -> str | None:
|
64
|
+
"""
|
65
|
+
Extract optimized model name.
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
Model name string if available, None otherwise
|
69
|
+
"""
|
70
|
+
return self.details.get("optimized_model")
|
71
|
+
|
72
|
+
def get_optimized_parameters(self) -> dict[str, Any]:
|
73
|
+
"""
|
74
|
+
Extract optimized parameter values.
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
Dictionary of optimized parameters, empty dict if not available
|
78
|
+
"""
|
79
|
+
return self.details.get("optimized_parameters", {})
|
80
|
+
|
81
|
+
def apply_to_prompt(self, prompt: Any) -> Any:
|
82
|
+
"""
|
83
|
+
Apply optimized parameters to a prompt.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
prompt: ChatPrompt instance to apply optimizations to
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
New ChatPrompt instance with optimized parameters applied
|
90
|
+
"""
|
91
|
+
prompt_copy = prompt.copy()
|
92
|
+
if "optimized_model_kwargs" in self.details:
|
93
|
+
prompt_copy.model_kwargs = self.details["optimized_model_kwargs"]
|
94
|
+
if "optimized_model" in self.details:
|
95
|
+
prompt_copy.model = self.details["optimized_model"]
|
96
|
+
return prompt_copy
|
97
|
+
|
47
98
|
def _calculate_improvement_str(self) -> str:
|
48
99
|
"""Helper to calculate improvement percentage string."""
|
49
100
|
initial_s = self.initial_score
|
@@ -113,12 +164,97 @@ class OptimizationResult(pydantic.BaseModel):
|
|
113
164
|
f"Final Best Score: {final_score_str}",
|
114
165
|
f"Total Improvement:{improvement_str.rjust(max(0, 18 - len('Total Improvement:')))}",
|
115
166
|
f"Rounds Completed: {rounds_ran}",
|
116
|
-
"\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
|
117
|
-
"--------------------------------------------------------------------------------",
|
118
|
-
f"{final_prompt_display}",
|
119
|
-
"--------------------------------------------------------------------------------",
|
120
|
-
f"{separator}",
|
121
167
|
]
|
168
|
+
|
169
|
+
optimized_params = self.details.get("optimized_parameters") or {}
|
170
|
+
parameter_importance = self.details.get("parameter_importance") or {}
|
171
|
+
search_ranges = self.details.get("search_ranges") or {}
|
172
|
+
precision = self.details.get("parameter_precision", 6)
|
173
|
+
|
174
|
+
if optimized_params:
|
175
|
+
|
176
|
+
def _format_range(desc: dict[str, Any]) -> str:
|
177
|
+
if "min" in desc and "max" in desc:
|
178
|
+
step_str = (
|
179
|
+
f", step={_format_float(desc['step'], precision)}"
|
180
|
+
if desc.get("step") is not None
|
181
|
+
else ""
|
182
|
+
)
|
183
|
+
return f"[{_format_float(desc['min'], precision)}, {_format_float(desc['max'], precision)}{step_str}]"
|
184
|
+
if desc.get("choices"):
|
185
|
+
return f"choices={desc['choices']}"
|
186
|
+
return str(desc)
|
187
|
+
|
188
|
+
rows = []
|
189
|
+
stage_order = [
|
190
|
+
record.get("stage")
|
191
|
+
for record in self.details.get("search_stages", [])
|
192
|
+
if record.get("stage") in search_ranges
|
193
|
+
]
|
194
|
+
if not stage_order:
|
195
|
+
stage_order = sorted(search_ranges)
|
196
|
+
|
197
|
+
for name in sorted(optimized_params):
|
198
|
+
contribution = parameter_importance.get(name)
|
199
|
+
stage_ranges = []
|
200
|
+
for stage in stage_order:
|
201
|
+
params = search_ranges.get(stage) or {}
|
202
|
+
if name in params:
|
203
|
+
stage_ranges.append(f"{stage}: {_format_range(params[name])}")
|
204
|
+
if not stage_ranges:
|
205
|
+
for stage, params in search_ranges.items():
|
206
|
+
if name in params:
|
207
|
+
stage_ranges.append(
|
208
|
+
f"{stage}: {_format_range(params[name])}"
|
209
|
+
)
|
210
|
+
joined_ranges = "\n".join(stage_ranges) if stage_ranges else "N/A"
|
211
|
+
rows.append(
|
212
|
+
{
|
213
|
+
"parameter": name,
|
214
|
+
"value": optimized_params[name],
|
215
|
+
"contribution": contribution,
|
216
|
+
"ranges": joined_ranges,
|
217
|
+
}
|
218
|
+
)
|
219
|
+
|
220
|
+
if rows:
|
221
|
+
output.append("Parameter Summary:")
|
222
|
+
# Compute overall improvement fraction for gain calculation
|
223
|
+
total_improvement = None
|
224
|
+
if isinstance(self.initial_score, (int, float)) and isinstance(
|
225
|
+
self.score, (int, float)
|
226
|
+
):
|
227
|
+
if self.initial_score != 0:
|
228
|
+
total_improvement = (self.score - self.initial_score) / abs(
|
229
|
+
self.initial_score
|
230
|
+
)
|
231
|
+
else:
|
232
|
+
total_improvement = self.score
|
233
|
+
for row in rows:
|
234
|
+
value_str = _format_float(row["value"], precision)
|
235
|
+
contrib_val = row["contribution"]
|
236
|
+
if contrib_val is not None:
|
237
|
+
contrib_percent = contrib_val * 100
|
238
|
+
gain_str = ""
|
239
|
+
if total_improvement is not None:
|
240
|
+
gain_value = contrib_val * total_improvement * 100
|
241
|
+
gain_str = f" ({gain_value:+.2f}%)"
|
242
|
+
contrib_str = f"{contrib_percent:.1f}%{gain_str}"
|
243
|
+
else:
|
244
|
+
contrib_str = "N/A"
|
245
|
+
output.append(
|
246
|
+
f"- {row['parameter']}: value={value_str}, contribution={contrib_str}, ranges=\n {row['ranges']}"
|
247
|
+
)
|
248
|
+
|
249
|
+
output.extend(
|
250
|
+
[
|
251
|
+
"\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
|
252
|
+
"--------------------------------------------------------------------------------",
|
253
|
+
f"{final_prompt_display}",
|
254
|
+
"--------------------------------------------------------------------------------",
|
255
|
+
f"{separator}",
|
256
|
+
]
|
257
|
+
)
|
122
258
|
return "\n".join(output)
|
123
259
|
|
124
260
|
def __rich__(self) -> rich.panel.Panel:
|
@@ -159,6 +295,11 @@ class OptimizationResult(pydantic.BaseModel):
|
|
159
295
|
),
|
160
296
|
)
|
161
297
|
|
298
|
+
optimized_params = self.details.get("optimized_parameters") or {}
|
299
|
+
parameter_importance = self.details.get("parameter_importance") or {}
|
300
|
+
search_ranges = self.details.get("search_ranges") or {}
|
301
|
+
precision = self.details.get("parameter_precision", 6)
|
302
|
+
|
162
303
|
# Display Chat Structure if available
|
163
304
|
panel_title = "[bold]Final Optimized Prompt[/bold]"
|
164
305
|
try:
|
@@ -190,7 +331,87 @@ class OptimizationResult(pydantic.BaseModel):
|
|
190
331
|
prompt_renderable, title=panel_title, border_style="blue", padding=(1, 2)
|
191
332
|
)
|
192
333
|
|
193
|
-
|
334
|
+
renderables: list[rich.console.RenderableType] = [table, "\n"]
|
335
|
+
|
336
|
+
if optimized_params:
|
337
|
+
summary_table = rich.table.Table(
|
338
|
+
title="Parameter Summary", show_header=True, title_style="bold"
|
339
|
+
)
|
340
|
+
summary_table.add_column("Parameter", justify="left", style="cyan")
|
341
|
+
summary_table.add_column("Value", justify="left")
|
342
|
+
summary_table.add_column("Importance", justify="left", style="magenta")
|
343
|
+
summary_table.add_column("Gain", justify="left", style="dim")
|
344
|
+
summary_table.add_column("Ranges", justify="left")
|
345
|
+
|
346
|
+
stage_order = [
|
347
|
+
record.get("stage")
|
348
|
+
for record in self.details.get("search_stages", [])
|
349
|
+
if record.get("stage") in search_ranges
|
350
|
+
]
|
351
|
+
if not stage_order:
|
352
|
+
stage_order = sorted(search_ranges)
|
353
|
+
|
354
|
+
def _format_range(desc: dict[str, Any]) -> str:
|
355
|
+
if "min" in desc and "max" in desc:
|
356
|
+
step_str = (
|
357
|
+
f", step={_format_float(desc['step'], precision)}"
|
358
|
+
if desc.get("step") is not None
|
359
|
+
else ""
|
360
|
+
)
|
361
|
+
return f"[{_format_float(desc['min'], precision)}, {_format_float(desc['max'], precision)}{step_str}]"
|
362
|
+
if desc.get("choices"):
|
363
|
+
return ",".join(map(str, desc["choices"]))
|
364
|
+
return str(desc)
|
365
|
+
|
366
|
+
total_improvement = None
|
367
|
+
if isinstance(self.initial_score, (int, float)) and isinstance(
|
368
|
+
self.score, (int, float)
|
369
|
+
):
|
370
|
+
if self.initial_score != 0:
|
371
|
+
total_improvement = (self.score - self.initial_score) / abs(
|
372
|
+
self.initial_score
|
373
|
+
)
|
374
|
+
else:
|
375
|
+
total_improvement = self.score
|
376
|
+
|
377
|
+
for name in sorted(optimized_params):
|
378
|
+
value_str = _format_float(optimized_params[name], precision)
|
379
|
+
contrib_val = parameter_importance.get(name)
|
380
|
+
if contrib_val is not None:
|
381
|
+
contrib_str = f"{contrib_val:.1%}"
|
382
|
+
gain_str = (
|
383
|
+
f"{contrib_val * total_improvement:+.2%}"
|
384
|
+
if total_improvement is not None
|
385
|
+
else "N/A"
|
386
|
+
)
|
387
|
+
else:
|
388
|
+
contrib_str = "N/A"
|
389
|
+
gain_str = "N/A"
|
390
|
+
ranges_parts = []
|
391
|
+
for stage in stage_order:
|
392
|
+
params = search_ranges.get(stage) or {}
|
393
|
+
if name in params:
|
394
|
+
ranges_parts.append(f"{stage}: {_format_range(params[name])}")
|
395
|
+
if not ranges_parts:
|
396
|
+
for stage, params in search_ranges.items():
|
397
|
+
if name in params:
|
398
|
+
ranges_parts.append(
|
399
|
+
f"{stage}: {_format_range(params[name])}"
|
400
|
+
)
|
401
|
+
|
402
|
+
summary_table.add_row(
|
403
|
+
name,
|
404
|
+
value_str,
|
405
|
+
contrib_str,
|
406
|
+
gain_str,
|
407
|
+
"\n".join(ranges_parts) if ranges_parts else "N/A",
|
408
|
+
)
|
409
|
+
|
410
|
+
renderables.extend([summary_table, "\n"])
|
411
|
+
|
412
|
+
renderables.append(prompt_panel)
|
413
|
+
|
414
|
+
content_group = rich.console.Group(*renderables)
|
194
415
|
|
195
416
|
return rich.panel.Panel(
|
196
417
|
content_group,
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from .parameter_optimizer import ParameterOptimizer
|
2
|
+
from .parameter_search_space import ParameterSearchSpace
|
3
|
+
from .parameter_spec import ParameterSpec
|
4
|
+
from .search_space_types import ParameterType
|
5
|
+
|
6
|
+
__all__ = [
|
7
|
+
"ParameterOptimizer",
|
8
|
+
"ParameterSearchSpace",
|
9
|
+
"ParameterSpec",
|
10
|
+
"ParameterType",
|
11
|
+
]
|
@@ -0,0 +1,382 @@
|
|
1
|
+
"""Simple Optuna-based optimizer for model parameter tuning."""
|
2
|
+
|
3
|
+
from collections.abc import Callable, Mapping
|
4
|
+
from typing import Any
|
5
|
+
|
6
|
+
import copy
|
7
|
+
import logging
|
8
|
+
from datetime import datetime
|
9
|
+
|
10
|
+
import optuna
|
11
|
+
from optuna import importance as optuna_importance
|
12
|
+
from optuna.trial import Trial, TrialState
|
13
|
+
|
14
|
+
from opik import Dataset
|
15
|
+
|
16
|
+
from ..base_optimizer import BaseOptimizer
|
17
|
+
from ..optimizable_agent import OptimizableAgent
|
18
|
+
from ..optimization_config import chat_prompt
|
19
|
+
from ..optimization_result import OptimizationResult
|
20
|
+
from .parameter_search_space import ParameterSearchSpace
|
21
|
+
from .search_space_types import ParameterType
|
22
|
+
from .sensitivity_analysis import compute_sensitivity_from_trials
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class ParameterOptimizer(BaseOptimizer):
|
28
|
+
"""Optimizer that tunes model call parameters (temperature, top_p, etc.)."""
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
model: str,
|
33
|
+
*,
|
34
|
+
default_n_trials: int = 20,
|
35
|
+
n_threads: int = 4,
|
36
|
+
seed: int = 42,
|
37
|
+
verbose: int = 1,
|
38
|
+
local_search_ratio: float = 0.3,
|
39
|
+
local_search_scale: float = 0.2,
|
40
|
+
**model_kwargs: Any,
|
41
|
+
) -> None:
|
42
|
+
super().__init__(model=model, verbose=verbose, seed=seed, **model_kwargs)
|
43
|
+
self.default_n_trials = default_n_trials
|
44
|
+
self.n_threads = n_threads
|
45
|
+
self.local_search_ratio = max(0.0, min(local_search_ratio, 1.0))
|
46
|
+
self.local_search_scale = max(0.0, local_search_scale)
|
47
|
+
|
48
|
+
if self.verbose == 0:
|
49
|
+
logger.setLevel(logging.WARNING)
|
50
|
+
elif self.verbose == 1:
|
51
|
+
logger.setLevel(logging.INFO)
|
52
|
+
else:
|
53
|
+
logger.setLevel(logging.DEBUG)
|
54
|
+
|
55
|
+
def optimize_prompt(
|
56
|
+
self,
|
57
|
+
prompt: chat_prompt.ChatPrompt,
|
58
|
+
dataset: Dataset,
|
59
|
+
metric: Callable[[Any, Any], float],
|
60
|
+
experiment_config: dict | None = None,
|
61
|
+
n_samples: int | None = None,
|
62
|
+
auto_continue: bool = False,
|
63
|
+
agent_class: type[OptimizableAgent] | None = None,
|
64
|
+
**kwargs: Any,
|
65
|
+
) -> OptimizationResult:
|
66
|
+
raise NotImplementedError(
|
67
|
+
"ParameterOptimizer.optimize_prompt is not supported. "
|
68
|
+
"Use optimize_parameter(prompt, dataset, metric, parameter_space) instead, "
|
69
|
+
"where parameter_space is a ParameterSearchSpace or dict defining the parameters to optimize."
|
70
|
+
)
|
71
|
+
|
72
|
+
def optimize_parameter(
|
73
|
+
self,
|
74
|
+
prompt: chat_prompt.ChatPrompt,
|
75
|
+
dataset: Dataset,
|
76
|
+
metric: Callable[[Any, Any], float],
|
77
|
+
parameter_space: ParameterSearchSpace | Mapping[str, Any],
|
78
|
+
experiment_config: dict | None = None,
|
79
|
+
n_trials: int | None = None,
|
80
|
+
n_samples: int | None = None,
|
81
|
+
agent_class: type[OptimizableAgent] | None = None,
|
82
|
+
**kwargs: Any,
|
83
|
+
) -> OptimizationResult:
|
84
|
+
if not isinstance(parameter_space, ParameterSearchSpace):
|
85
|
+
parameter_space = ParameterSearchSpace.model_validate(parameter_space)
|
86
|
+
|
87
|
+
# After validation, parameter_space is guaranteed to be ParameterSearchSpace
|
88
|
+
assert isinstance(parameter_space, ParameterSearchSpace) # for mypy
|
89
|
+
|
90
|
+
sampler = kwargs.pop("sampler", None)
|
91
|
+
callbacks = kwargs.pop("callbacks", None)
|
92
|
+
timeout = kwargs.pop("timeout", None)
|
93
|
+
local_trials_override = kwargs.pop("local_trials", None)
|
94
|
+
local_search_scale_override = kwargs.pop("local_search_scale", None)
|
95
|
+
if kwargs:
|
96
|
+
extra_keys = ", ".join(sorted(kwargs.keys()))
|
97
|
+
raise TypeError(f"Unsupported keyword arguments: {extra_keys}")
|
98
|
+
|
99
|
+
self.validate_optimization_inputs(prompt, dataset, metric)
|
100
|
+
self.configure_prompt_model(prompt)
|
101
|
+
|
102
|
+
base_model_kwargs = copy.deepcopy(prompt.model_kwargs or {})
|
103
|
+
base_prompt = prompt.copy()
|
104
|
+
base_prompt.model_kwargs = copy.deepcopy(base_model_kwargs)
|
105
|
+
|
106
|
+
metric_name = getattr(metric, "__name__", str(metric))
|
107
|
+
|
108
|
+
self.agent_class = self.setup_agent_class(base_prompt, agent_class)
|
109
|
+
baseline_score = self.evaluate_prompt(
|
110
|
+
prompt=base_prompt,
|
111
|
+
dataset=dataset,
|
112
|
+
metric=metric,
|
113
|
+
n_threads=self.n_threads,
|
114
|
+
verbose=self.verbose,
|
115
|
+
experiment_config=experiment_config,
|
116
|
+
n_samples=n_samples,
|
117
|
+
agent_class=self.agent_class,
|
118
|
+
)
|
119
|
+
|
120
|
+
history: list[dict[str, Any]] = [
|
121
|
+
{
|
122
|
+
"iteration": 0,
|
123
|
+
"timestamp": datetime.utcnow().isoformat(),
|
124
|
+
"parameters": {},
|
125
|
+
"score": baseline_score,
|
126
|
+
"model_kwargs": copy.deepcopy(base_prompt.model_kwargs or {}),
|
127
|
+
"model": base_prompt.model,
|
128
|
+
"type": "baseline",
|
129
|
+
"stage": "baseline",
|
130
|
+
}
|
131
|
+
]
|
132
|
+
|
133
|
+
try:
|
134
|
+
optuna.logging.disable_default_handler()
|
135
|
+
optuna_logger = logging.getLogger("optuna")
|
136
|
+
optuna_logger.setLevel(logger.getEffectiveLevel())
|
137
|
+
optuna_logger.propagate = False
|
138
|
+
except Exception as exc: # pragma: no cover - defensive safety
|
139
|
+
logger.warning("Could not configure Optuna logging: %s", exc)
|
140
|
+
|
141
|
+
sampler = sampler or optuna.samplers.TPESampler(seed=self.seed)
|
142
|
+
study = optuna.create_study(direction="maximize", sampler=sampler)
|
143
|
+
|
144
|
+
total_trials = self.default_n_trials if n_trials is None else n_trials
|
145
|
+
if total_trials < 0:
|
146
|
+
total_trials = 0
|
147
|
+
|
148
|
+
if local_trials_override is not None:
|
149
|
+
local_trials = min(max(int(local_trials_override), 0), total_trials)
|
150
|
+
else:
|
151
|
+
local_trials = int(total_trials * self.local_search_ratio)
|
152
|
+
|
153
|
+
global_trials = total_trials - local_trials
|
154
|
+
if total_trials > 0 and global_trials <= 0:
|
155
|
+
global_trials = 1
|
156
|
+
local_trials = max(0, total_trials - global_trials)
|
157
|
+
|
158
|
+
current_space = parameter_space
|
159
|
+
current_stage = "global"
|
160
|
+
stage_records: list[dict[str, Any]] = []
|
161
|
+
search_ranges: dict[str, dict[str, Any]] = {}
|
162
|
+
|
163
|
+
def objective(trial: Trial) -> float:
|
164
|
+
sampled_values = current_space.suggest(trial)
|
165
|
+
tuned_prompt = parameter_space.apply(
|
166
|
+
prompt,
|
167
|
+
sampled_values,
|
168
|
+
base_model_kwargs=base_model_kwargs,
|
169
|
+
)
|
170
|
+
tuned_agent_class = self.setup_agent_class(tuned_prompt, agent_class)
|
171
|
+
score = self.evaluate_prompt(
|
172
|
+
prompt=tuned_prompt,
|
173
|
+
dataset=dataset,
|
174
|
+
metric=metric,
|
175
|
+
n_threads=self.n_threads,
|
176
|
+
verbose=self.verbose,
|
177
|
+
experiment_config=experiment_config,
|
178
|
+
n_samples=n_samples,
|
179
|
+
agent_class=tuned_agent_class,
|
180
|
+
)
|
181
|
+
trial.set_user_attr("parameters", sampled_values)
|
182
|
+
trial.set_user_attr(
|
183
|
+
"model_kwargs", copy.deepcopy(tuned_prompt.model_kwargs)
|
184
|
+
)
|
185
|
+
trial.set_user_attr("model", tuned_prompt.model)
|
186
|
+
trial.set_user_attr("stage", current_stage)
|
187
|
+
return float(score)
|
188
|
+
|
189
|
+
global_range = parameter_space.describe()
|
190
|
+
stage_records.append(
|
191
|
+
{
|
192
|
+
"stage": "global",
|
193
|
+
"trials": global_trials,
|
194
|
+
"scale": 1.0,
|
195
|
+
"parameters": global_range,
|
196
|
+
}
|
197
|
+
)
|
198
|
+
search_ranges["global"] = global_range
|
199
|
+
|
200
|
+
if global_trials > 0:
|
201
|
+
study.optimize(
|
202
|
+
objective,
|
203
|
+
n_trials=global_trials,
|
204
|
+
timeout=timeout,
|
205
|
+
callbacks=callbacks,
|
206
|
+
show_progress_bar=False,
|
207
|
+
)
|
208
|
+
|
209
|
+
for trial in study.trials:
|
210
|
+
if trial.state != TrialState.COMPLETE or trial.value is None:
|
211
|
+
continue
|
212
|
+
timestamp = (
|
213
|
+
trial.datetime_complete or trial.datetime_start or datetime.utcnow()
|
214
|
+
)
|
215
|
+
history.append(
|
216
|
+
{
|
217
|
+
"iteration": trial.number + 1,
|
218
|
+
"timestamp": timestamp.isoformat(),
|
219
|
+
"parameters": trial.user_attrs.get("parameters", {}),
|
220
|
+
"score": float(trial.value),
|
221
|
+
"model_kwargs": trial.user_attrs.get("model_kwargs"),
|
222
|
+
"model": trial.user_attrs.get("model"),
|
223
|
+
"stage": trial.user_attrs.get("stage", "global"),
|
224
|
+
}
|
225
|
+
)
|
226
|
+
|
227
|
+
best_score = baseline_score
|
228
|
+
best_parameters: dict[str, Any] = {}
|
229
|
+
best_model_kwargs = copy.deepcopy(base_prompt.model_kwargs or {})
|
230
|
+
best_model = base_prompt.model
|
231
|
+
|
232
|
+
completed_trials = [
|
233
|
+
trial
|
234
|
+
for trial in study.trials
|
235
|
+
if trial.state == TrialState.COMPLETE and trial.value is not None
|
236
|
+
]
|
237
|
+
if completed_trials:
|
238
|
+
best_trial = max(completed_trials, key=lambda t: t.value) # type: ignore[arg-type]
|
239
|
+
if best_trial.value is not None and best_trial.value > best_score:
|
240
|
+
best_score = float(best_trial.value)
|
241
|
+
best_parameters = best_trial.user_attrs.get("parameters", {})
|
242
|
+
best_model_kwargs = best_trial.user_attrs.get("model_kwargs", {})
|
243
|
+
best_model = best_trial.user_attrs.get("model", prompt.model)
|
244
|
+
|
245
|
+
local_space: ParameterSearchSpace | None = None
|
246
|
+
if (
|
247
|
+
local_trials > 0
|
248
|
+
and completed_trials
|
249
|
+
and any(
|
250
|
+
spec.distribution in {ParameterType.FLOAT, ParameterType.INT}
|
251
|
+
for spec in parameter_space.parameters
|
252
|
+
)
|
253
|
+
):
|
254
|
+
local_scale = (
|
255
|
+
self.local_search_scale
|
256
|
+
if local_search_scale_override is None
|
257
|
+
else max(0.0, float(local_search_scale_override))
|
258
|
+
)
|
259
|
+
|
260
|
+
if best_parameters:
|
261
|
+
center_values = best_parameters
|
262
|
+
elif base_model_kwargs:
|
263
|
+
center_values = base_model_kwargs
|
264
|
+
else:
|
265
|
+
center_values = {}
|
266
|
+
|
267
|
+
if local_scale > 0 and center_values:
|
268
|
+
current_stage = "local"
|
269
|
+
local_space = parameter_space.narrow_around(center_values, local_scale)
|
270
|
+
local_range = local_space.describe()
|
271
|
+
stage_records.append(
|
272
|
+
{
|
273
|
+
"stage": "local",
|
274
|
+
"trials": local_trials,
|
275
|
+
"scale": local_scale,
|
276
|
+
"parameters": local_range,
|
277
|
+
}
|
278
|
+
)
|
279
|
+
search_ranges["local"] = local_range
|
280
|
+
|
281
|
+
current_space = local_space
|
282
|
+
study.optimize(
|
283
|
+
objective,
|
284
|
+
n_trials=local_trials,
|
285
|
+
timeout=timeout,
|
286
|
+
callbacks=callbacks,
|
287
|
+
show_progress_bar=False,
|
288
|
+
)
|
289
|
+
|
290
|
+
completed_trials = [
|
291
|
+
trial
|
292
|
+
for trial in study.trials
|
293
|
+
if trial.state == TrialState.COMPLETE and trial.value is not None
|
294
|
+
]
|
295
|
+
if completed_trials:
|
296
|
+
new_best = max(completed_trials, key=lambda t: t.value) # type: ignore[arg-type]
|
297
|
+
if new_best.value is not None and new_best.value > best_score:
|
298
|
+
best_score = float(new_best.value)
|
299
|
+
best_parameters = new_best.user_attrs.get("parameters", {})
|
300
|
+
best_model_kwargs = new_best.user_attrs.get("model_kwargs", {})
|
301
|
+
best_model = new_best.user_attrs.get("model", prompt.model)
|
302
|
+
|
303
|
+
else:
|
304
|
+
local_trials = 0
|
305
|
+
|
306
|
+
for trial in study.trials:
|
307
|
+
if trial.state != TrialState.COMPLETE or trial.value is None:
|
308
|
+
continue
|
309
|
+
timestamp = (
|
310
|
+
trial.datetime_complete or trial.datetime_start or datetime.utcnow()
|
311
|
+
)
|
312
|
+
if not any(entry["iteration"] == trial.number + 1 for entry in history):
|
313
|
+
history.append(
|
314
|
+
{
|
315
|
+
"iteration": trial.number + 1,
|
316
|
+
"timestamp": timestamp.isoformat(),
|
317
|
+
"parameters": trial.user_attrs.get("parameters", {}),
|
318
|
+
"score": float(trial.value),
|
319
|
+
"model_kwargs": trial.user_attrs.get("model_kwargs"),
|
320
|
+
"model": trial.user_attrs.get("model"),
|
321
|
+
"stage": trial.user_attrs.get("stage", current_stage),
|
322
|
+
}
|
323
|
+
)
|
324
|
+
|
325
|
+
rounds_summary = [
|
326
|
+
{
|
327
|
+
"iteration": trial.number + 1,
|
328
|
+
"parameters": trial.user_attrs.get("parameters", {}),
|
329
|
+
"score": float(trial.value) if trial.value is not None else None,
|
330
|
+
"model": trial.user_attrs.get("model"),
|
331
|
+
"stage": trial.user_attrs.get("stage"),
|
332
|
+
}
|
333
|
+
for trial in completed_trials
|
334
|
+
]
|
335
|
+
|
336
|
+
try:
|
337
|
+
importance = optuna_importance.get_param_importances(study)
|
338
|
+
except (ValueError, RuntimeError, ImportError):
|
339
|
+
# Falls back to custom sensitivity analysis if:
|
340
|
+
# - Study has insufficient data (ValueError/RuntimeError)
|
341
|
+
# - scikit-learn not installed (ImportError)
|
342
|
+
importance = {}
|
343
|
+
|
344
|
+
if not importance or all(value == 0 for value in importance.values()):
|
345
|
+
importance = compute_sensitivity_from_trials(
|
346
|
+
completed_trials, parameter_space.parameters
|
347
|
+
)
|
348
|
+
|
349
|
+
details = {
|
350
|
+
"initial_score": baseline_score,
|
351
|
+
"optimized_parameters": best_parameters,
|
352
|
+
"optimized_model_kwargs": best_model_kwargs,
|
353
|
+
"optimized_model": best_model,
|
354
|
+
"trials": history,
|
355
|
+
"parameter_space": parameter_space.model_dump(by_alias=True),
|
356
|
+
"n_trials": total_trials,
|
357
|
+
"model": best_model,
|
358
|
+
"rounds": rounds_summary,
|
359
|
+
"baseline_parameters": base_model_kwargs,
|
360
|
+
"temperature": best_model_kwargs.get("temperature"),
|
361
|
+
"local_trials": local_trials,
|
362
|
+
"global_trials": global_trials,
|
363
|
+
"search_stages": stage_records,
|
364
|
+
"search_ranges": search_ranges,
|
365
|
+
"parameter_importance": importance,
|
366
|
+
"parameter_precision": 6,
|
367
|
+
}
|
368
|
+
|
369
|
+
return OptimizationResult(
|
370
|
+
optimizer=self.__class__.__name__,
|
371
|
+
prompt=prompt.get_messages() if hasattr(prompt, "get_messages") else [],
|
372
|
+
initial_prompt=prompt.get_messages()
|
373
|
+
if hasattr(prompt, "get_messages")
|
374
|
+
else [],
|
375
|
+
initial_score=baseline_score,
|
376
|
+
score=best_score,
|
377
|
+
metric_name=metric_name,
|
378
|
+
details=details,
|
379
|
+
history=history,
|
380
|
+
llm_calls=self.llm_call_counter,
|
381
|
+
tool_calls=self.tool_call_counter,
|
382
|
+
)
|
@@ -0,0 +1,125 @@
|
|
1
|
+
"""Parameter search space for collections of tunable parameters."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import copy
|
6
|
+
from typing import Any
|
7
|
+
from collections.abc import Mapping, Sequence
|
8
|
+
|
9
|
+
from optuna.trial import Trial
|
10
|
+
from pydantic import BaseModel, Field, model_validator
|
11
|
+
|
12
|
+
from .parameter_spec import ParameterSpec
|
13
|
+
|
14
|
+
|
15
|
+
class ParameterSearchSpace(BaseModel):
|
16
|
+
"""Collection of parameters to explore during optimization."""
|
17
|
+
|
18
|
+
parameters: list[ParameterSpec] = Field(default_factory=list)
|
19
|
+
|
20
|
+
model_config = {
|
21
|
+
"extra": "forbid",
|
22
|
+
}
|
23
|
+
|
24
|
+
@model_validator(mode="before")
|
25
|
+
@classmethod
|
26
|
+
def _normalize(cls, data: Any) -> Any:
|
27
|
+
if isinstance(data, ParameterSearchSpace):
|
28
|
+
return data
|
29
|
+
if isinstance(data, Mapping):
|
30
|
+
if "parameters" in data:
|
31
|
+
return data
|
32
|
+
parameters = []
|
33
|
+
for name, spec in data.items():
|
34
|
+
if isinstance(spec, Mapping):
|
35
|
+
spec_dict = dict(spec)
|
36
|
+
elif isinstance(spec, ParameterSpec):
|
37
|
+
spec_dict = spec.model_dump()
|
38
|
+
else:
|
39
|
+
raise TypeError(
|
40
|
+
"Parameter definitions must be mappings or ParameterSpec instances"
|
41
|
+
)
|
42
|
+
spec_dict.setdefault("name", name)
|
43
|
+
parameters.append(spec_dict)
|
44
|
+
return {"parameters": parameters}
|
45
|
+
if isinstance(data, Sequence):
|
46
|
+
return {"parameters": list(data)}
|
47
|
+
return data
|
48
|
+
|
49
|
+
@model_validator(mode="after")
|
50
|
+
def _validate(self) -> ParameterSearchSpace:
|
51
|
+
names = [spec.name for spec in self.parameters]
|
52
|
+
if len(names) != len(set(names)):
|
53
|
+
duplicates = {name for name in names if names.count(name) > 1}
|
54
|
+
raise ValueError(
|
55
|
+
f"Duplicate parameter names detected: {', '.join(sorted(duplicates))}"
|
56
|
+
)
|
57
|
+
if not self.parameters:
|
58
|
+
raise ValueError("Parameter search space cannot be empty")
|
59
|
+
return self
|
60
|
+
|
61
|
+
def suggest(self, trial: Trial) -> dict[str, Any]:
|
62
|
+
"""Sample a set of parameter values using an Optuna trial."""
|
63
|
+
return {spec.name: spec.suggest(trial) for spec in self.parameters}
|
64
|
+
|
65
|
+
def apply(
|
66
|
+
self,
|
67
|
+
prompt: Any, # ChatPrompt type
|
68
|
+
values: Mapping[str, Any],
|
69
|
+
*,
|
70
|
+
base_model_kwargs: dict[str, Any] | None = None,
|
71
|
+
) -> Any: # Returns ChatPrompt
|
72
|
+
"""Return a prompt copy with sampled values applied."""
|
73
|
+
prompt_copy = prompt.copy()
|
74
|
+
if base_model_kwargs is not None:
|
75
|
+
prompt_copy.model_kwargs = copy.deepcopy(base_model_kwargs)
|
76
|
+
for spec in self.parameters:
|
77
|
+
if spec.name in values:
|
78
|
+
spec.apply_to_prompt(prompt_copy, values[spec.name])
|
79
|
+
return prompt_copy
|
80
|
+
|
81
|
+
def values_to_model_kwargs(
|
82
|
+
self,
|
83
|
+
values: Mapping[str, Any],
|
84
|
+
*,
|
85
|
+
base: dict[str, Any] | None = None,
|
86
|
+
) -> dict[str, Any]:
|
87
|
+
"""Produce a model_kwargs dictionary with sampled values applied."""
|
88
|
+
model_kwargs = copy.deepcopy(base) if base is not None else {}
|
89
|
+
for spec in self.parameters:
|
90
|
+
if spec.name in values:
|
91
|
+
spec.apply_to_model_kwargs(model_kwargs, values[spec.name])
|
92
|
+
return model_kwargs
|
93
|
+
|
94
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
95
|
+
"""Ensure dumping keeps parameter definitions accessible."""
|
96
|
+
return super().model_dump(*args, **kwargs)
|
97
|
+
|
98
|
+
def narrow_around(
|
99
|
+
self, values: Mapping[str, Any], scale: float
|
100
|
+
) -> ParameterSearchSpace:
|
101
|
+
"""Return a new search space narrowed around provided parameter values."""
|
102
|
+
|
103
|
+
narrowed: list[ParameterSpec] = []
|
104
|
+
for spec in self.parameters:
|
105
|
+
value = values.get(spec.name)
|
106
|
+
narrowed.append(spec.narrow(value, scale))
|
107
|
+
return ParameterSearchSpace(parameters=narrowed)
|
108
|
+
|
109
|
+
def describe(self) -> dict[str, dict[str, Any]]:
|
110
|
+
"""Return a human-friendly description of each parameter range."""
|
111
|
+
|
112
|
+
summary: dict[str, dict[str, Any]] = {}
|
113
|
+
for spec in self.parameters:
|
114
|
+
entry: dict[str, Any] = {"type": spec.distribution.value}
|
115
|
+
if spec.distribution.value in {"float", "int"}:
|
116
|
+
entry["min"] = spec.low
|
117
|
+
entry["max"] = spec.high
|
118
|
+
if spec.step is not None:
|
119
|
+
entry["step"] = spec.step
|
120
|
+
entry["scale"] = spec.scale
|
121
|
+
else:
|
122
|
+
if spec.choices is not None:
|
123
|
+
entry["choices"] = list(spec.choices)
|
124
|
+
summary[spec.name] = entry
|
125
|
+
return summary
|
@@ -0,0 +1,214 @@
|
|
1
|
+
"""Parameter specification for defining tunable parameters."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import math
|
6
|
+
from typing import Any, Literal
|
7
|
+
from collections.abc import Mapping, Sequence
|
8
|
+
|
9
|
+
from optuna.trial import Trial
|
10
|
+
from pydantic import BaseModel, Field, PrivateAttr, model_validator
|
11
|
+
|
12
|
+
from .search_space_types import ParameterType, ResolvedTarget
|
13
|
+
|
14
|
+
|
15
|
+
class ParameterSpec(BaseModel):
|
16
|
+
"""Definition for a single tunable parameter."""
|
17
|
+
|
18
|
+
name: str
|
19
|
+
description: str | None = None
|
20
|
+
distribution: ParameterType = Field(alias="type")
|
21
|
+
low: float | None = Field(default=None, alias="min")
|
22
|
+
high: float | None = Field(default=None, alias="max")
|
23
|
+
step: float | None = None
|
24
|
+
scale: Literal["linear", "log"] = "linear"
|
25
|
+
choices: list[Any] | None = None
|
26
|
+
target: str | Sequence[str] | None = None
|
27
|
+
default: Any | None = None
|
28
|
+
|
29
|
+
model_config = {
|
30
|
+
"populate_by_name": True,
|
31
|
+
"extra": "forbid",
|
32
|
+
}
|
33
|
+
|
34
|
+
_resolved_target: ResolvedTarget | None = PrivateAttr(default=None)
|
35
|
+
|
36
|
+
@model_validator(mode="before")
|
37
|
+
@classmethod
|
38
|
+
def _coerce_aliases(cls, data: Any) -> Any:
|
39
|
+
if isinstance(data, Mapping):
|
40
|
+
data = dict(data)
|
41
|
+
if "values" in data and "choices" not in data:
|
42
|
+
data["choices"] = data.pop("values")
|
43
|
+
if "selection" in data and "choices" not in data:
|
44
|
+
data["choices"] = data.pop("selection")
|
45
|
+
if "path" in data and "target" not in data:
|
46
|
+
data["target"] = data.pop("path")
|
47
|
+
return data
|
48
|
+
|
49
|
+
@model_validator(mode="after")
|
50
|
+
def _validate(self) -> ParameterSpec:
|
51
|
+
if self.distribution in {ParameterType.FLOAT, ParameterType.INT}:
|
52
|
+
if self.low is None or self.high is None:
|
53
|
+
raise ValueError(
|
54
|
+
"'min' and 'max' must be provided for range parameters"
|
55
|
+
)
|
56
|
+
if self.low >= self.high:
|
57
|
+
raise ValueError("'min' must be less than 'max'")
|
58
|
+
if self.scale not in {"linear", "log"}:
|
59
|
+
raise ValueError("scale must be 'linear' or 'log'")
|
60
|
+
if self.scale == "log" and (self.low <= 0 or self.high <= 0):
|
61
|
+
raise ValueError("log-scaled parameters require positive bounds")
|
62
|
+
if self.step is not None and self.step <= 0:
|
63
|
+
raise ValueError("step must be positive when provided")
|
64
|
+
|
65
|
+
if self.distribution == ParameterType.INT:
|
66
|
+
object.__setattr__(self, "low", int(self.low))
|
67
|
+
object.__setattr__(self, "high", int(self.high))
|
68
|
+
if self.step is not None:
|
69
|
+
object.__setattr__(self, "step", int(self.step))
|
70
|
+
elif self.distribution == ParameterType.CATEGORICAL:
|
71
|
+
if not self.choices:
|
72
|
+
raise ValueError("categorical parameters require non-empty 'choices'")
|
73
|
+
elif self.distribution == ParameterType.BOOL:
|
74
|
+
if not self.choices:
|
75
|
+
object.__setattr__(self, "choices", [False, True])
|
76
|
+
else: # pragma: no cover - safety fallback
|
77
|
+
raise ValueError(f"Unsupported distribution: {self.distribution}")
|
78
|
+
|
79
|
+
object.__setattr__(self, "_resolved_target", self._resolve_target())
|
80
|
+
return self
|
81
|
+
|
82
|
+
@property
|
83
|
+
def target_path(self) -> ResolvedTarget:
|
84
|
+
if self._resolved_target is None:
|
85
|
+
self._resolved_target = self._resolve_target()
|
86
|
+
return self._resolved_target
|
87
|
+
|
88
|
+
def suggest(self, trial: Trial) -> Any:
|
89
|
+
"""Return a sampled value for this parameter from Optuna."""
|
90
|
+
if self.distribution == ParameterType.FLOAT:
|
91
|
+
assert self.low is not None and self.high is not None # validated earlier
|
92
|
+
return trial.suggest_float(
|
93
|
+
self.name,
|
94
|
+
float(self.low),
|
95
|
+
float(self.high),
|
96
|
+
step=self.step,
|
97
|
+
log=self.scale == "log",
|
98
|
+
)
|
99
|
+
if self.distribution == ParameterType.INT:
|
100
|
+
assert self.low is not None and self.high is not None # validated earlier
|
101
|
+
return trial.suggest_int(
|
102
|
+
self.name,
|
103
|
+
int(self.low),
|
104
|
+
int(self.high),
|
105
|
+
step=int(self.step) if self.step is not None else 1,
|
106
|
+
log=self.scale == "log",
|
107
|
+
)
|
108
|
+
if self.distribution in {ParameterType.CATEGORICAL, ParameterType.BOOL}:
|
109
|
+
assert self.choices is not None # guarded in validators
|
110
|
+
return trial.suggest_categorical(self.name, list(self.choices))
|
111
|
+
raise RuntimeError(f"Unsupported distribution type: {self.distribution}")
|
112
|
+
|
113
|
+
def apply_to_prompt(
|
114
|
+
self,
|
115
|
+
prompt: Any,
|
116
|
+
value: Any, # ChatPrompt type
|
117
|
+
) -> None:
|
118
|
+
"""Apply a sampled value to the provided prompt instance."""
|
119
|
+
resolved = self.target_path
|
120
|
+
if resolved.root == "model":
|
121
|
+
if resolved.path:
|
122
|
+
raise ValueError("Nested paths under 'model' are not supported")
|
123
|
+
prompt.model = value
|
124
|
+
return
|
125
|
+
|
126
|
+
if prompt.model_kwargs is None:
|
127
|
+
prompt.model_kwargs = {}
|
128
|
+
|
129
|
+
self._assign_nested(prompt.model_kwargs, resolved.path, value)
|
130
|
+
|
131
|
+
def apply_to_model_kwargs(self, model_kwargs: dict[str, Any], value: Any) -> None:
|
132
|
+
"""Apply a sampled value to a model_kwargs dictionary."""
|
133
|
+
resolved = self.target_path
|
134
|
+
if resolved.root != "model_kwargs":
|
135
|
+
return
|
136
|
+
self._assign_nested(model_kwargs, resolved.path, value)
|
137
|
+
|
138
|
+
def narrow(self, center: Any, scale: float) -> ParameterSpec:
|
139
|
+
"""Return a narrowed version of the spec around the provided center."""
|
140
|
+
|
141
|
+
if center is None or scale <= 0:
|
142
|
+
return self
|
143
|
+
|
144
|
+
if self.distribution in {ParameterType.FLOAT, ParameterType.INT}:
|
145
|
+
if self.low is None or self.high is None:
|
146
|
+
return self
|
147
|
+
|
148
|
+
span = float(self.high) - float(self.low)
|
149
|
+
if span <= 0:
|
150
|
+
return self
|
151
|
+
|
152
|
+
half_window = span * float(scale) / 2
|
153
|
+
if half_window <= 0:
|
154
|
+
return self
|
155
|
+
|
156
|
+
center_val = float(center)
|
157
|
+
new_low = max(float(self.low), center_val - half_window)
|
158
|
+
new_high = min(float(self.high), center_val + half_window)
|
159
|
+
|
160
|
+
if self.distribution == ParameterType.INT:
|
161
|
+
new_low = math.floor(new_low)
|
162
|
+
new_high = math.ceil(new_high)
|
163
|
+
if new_low == new_high:
|
164
|
+
new_high = min(int(self.high), new_low + 1)
|
165
|
+
if new_low == new_high:
|
166
|
+
return self
|
167
|
+
|
168
|
+
if new_low >= new_high:
|
169
|
+
return self
|
170
|
+
|
171
|
+
spec_dict = self.model_dump(by_alias=True)
|
172
|
+
spec_dict["min"] = new_low
|
173
|
+
spec_dict["max"] = new_high
|
174
|
+
return ParameterSpec.model_validate(spec_dict)
|
175
|
+
|
176
|
+
# Non-numeric parameters remain unchanged
|
177
|
+
return self
|
178
|
+
|
179
|
+
def _assign_nested(
|
180
|
+
self, container: dict[str, Any], path: Sequence[str], value: Any
|
181
|
+
) -> None:
|
182
|
+
if not path:
|
183
|
+
container[self.name] = value
|
184
|
+
return
|
185
|
+
current = container
|
186
|
+
for key in path[:-1]:
|
187
|
+
next_val = current.get(key)
|
188
|
+
if not isinstance(next_val, dict):
|
189
|
+
next_val = {}
|
190
|
+
current[key] = next_val
|
191
|
+
current = next_val
|
192
|
+
current[path[-1]] = value
|
193
|
+
|
194
|
+
def _resolve_target(self) -> ResolvedTarget:
|
195
|
+
target = self.target
|
196
|
+
if target is None:
|
197
|
+
return ResolvedTarget("model_kwargs", (self.name,))
|
198
|
+
|
199
|
+
if isinstance(target, str):
|
200
|
+
tokens = tuple(filter(None, (part.strip() for part in target.split("."))))
|
201
|
+
else:
|
202
|
+
tokens = tuple(target)
|
203
|
+
|
204
|
+
if not tokens:
|
205
|
+
return ResolvedTarget("model_kwargs", (self.name,))
|
206
|
+
|
207
|
+
root = tokens[0]
|
208
|
+
path = tokens[1:]
|
209
|
+
|
210
|
+
if root not in {"model", "model_kwargs"}:
|
211
|
+
root = "model_kwargs"
|
212
|
+
path = tokens
|
213
|
+
|
214
|
+
return ResolvedTarget(root, tuple(path)) # type: ignore[arg-type]
|
@@ -0,0 +1,24 @@
|
|
1
|
+
"""Type definitions for parameter search space."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from enum import Enum
|
7
|
+
from typing import Literal
|
8
|
+
|
9
|
+
|
10
|
+
class ParameterType(str, Enum):
|
11
|
+
"""Supported parameter distribution types."""
|
12
|
+
|
13
|
+
FLOAT = "float"
|
14
|
+
INT = "int"
|
15
|
+
CATEGORICAL = "categorical"
|
16
|
+
BOOL = "bool"
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass(frozen=True)
|
20
|
+
class ResolvedTarget:
|
21
|
+
"""Resolved target location for a parameter."""
|
22
|
+
|
23
|
+
root: Literal["model", "model_kwargs"]
|
24
|
+
path: tuple[str, ...]
|
@@ -0,0 +1,71 @@
|
|
1
|
+
"""Sensitivity analysis utilities for parameter optimization."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import math
|
6
|
+
from typing import TYPE_CHECKING
|
7
|
+
|
8
|
+
if TYPE_CHECKING:
|
9
|
+
from optuna.trial import Trial
|
10
|
+
|
11
|
+
from .search_space import ParameterSpec
|
12
|
+
|
13
|
+
|
14
|
+
def compute_sensitivity_from_trials(
|
15
|
+
trials: list[Trial], specs: list[ParameterSpec]
|
16
|
+
) -> dict[str, float]:
|
17
|
+
"""
|
18
|
+
Compute parameter sensitivity from completed trials.
|
19
|
+
|
20
|
+
This function calculates a correlation-based sensitivity measure for each parameter
|
21
|
+
by analyzing how changes in parameter values correlate with changes in the objective
|
22
|
+
function values across trials.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
trials: List of completed Optuna trials
|
26
|
+
specs: List of parameter specifications
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
Dictionary mapping parameter names to sensitivity scores (0.0 to 1.0)
|
30
|
+
"""
|
31
|
+
sensitivities: dict[str, float] = {}
|
32
|
+
|
33
|
+
for spec in specs:
|
34
|
+
param_name = spec.name
|
35
|
+
values: list[float] = []
|
36
|
+
scores: list[float] = []
|
37
|
+
|
38
|
+
for trial in trials:
|
39
|
+
if trial.value is None:
|
40
|
+
continue
|
41
|
+
|
42
|
+
raw_value = trial.params.get(param_name)
|
43
|
+
if isinstance(raw_value, bool):
|
44
|
+
processed = float(int(raw_value))
|
45
|
+
elif isinstance(raw_value, (int, float)):
|
46
|
+
processed = float(raw_value)
|
47
|
+
else:
|
48
|
+
continue
|
49
|
+
|
50
|
+
values.append(processed)
|
51
|
+
scores.append(float(trial.value))
|
52
|
+
|
53
|
+
if len(values) < 2 or len(set(values)) == 1:
|
54
|
+
sensitivities[param_name] = 0.0
|
55
|
+
continue
|
56
|
+
|
57
|
+
mean_val = sum(values) / len(values)
|
58
|
+
mean_score = sum(scores) / len(scores)
|
59
|
+
|
60
|
+
cov = sum((v - mean_val) * (s - mean_score) for v, s in zip(values, scores))
|
61
|
+
var_val = sum((v - mean_val) ** 2 for v in values)
|
62
|
+
var_score = sum((s - mean_score) ** 2 for s in scores)
|
63
|
+
|
64
|
+
if var_val <= 0 or var_score <= 0:
|
65
|
+
sensitivities[param_name] = 0.0
|
66
|
+
continue
|
67
|
+
|
68
|
+
corr = abs(cov) / math.sqrt(var_val * var_score)
|
69
|
+
sensitivities[param_name] = min(max(corr, 0.0), 1.0)
|
70
|
+
|
71
|
+
return sensitivities
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: opik_optimizer
|
3
|
-
Version: 2.0
|
3
|
+
Version: 2.1.0
|
4
4
|
Summary: Agent optimization with Opik
|
5
5
|
Home-page: https://github.com/comet-ml/opik
|
6
6
|
Author: Comet ML
|
@@ -32,6 +32,7 @@ Requires-Dist: pytest; extra == "dev"
|
|
32
32
|
Requires-Dist: pytest-cov; extra == "dev"
|
33
33
|
Requires-Dist: langgraph; extra == "dev"
|
34
34
|
Requires-Dist: pre-commit; extra == "dev"
|
35
|
+
Requires-Dist: scikit-learn; extra == "dev"
|
35
36
|
Dynamic: author
|
36
37
|
Dynamic: home-page
|
37
38
|
Dynamic: license-file
|
@@ -51,6 +52,7 @@ The Opik Agent Optimizer refines your prompts to achieve better performance from
|
|
51
52
|
* **MetaPromptOptimizer** - Employs meta-prompting techniques for optimization
|
52
53
|
* **MiproOptimizer** - Implements MIPRO (Multi-Input Prompt Optimization) algorithm
|
53
54
|
* **GepaOptimizer** - Leverages GEPA (Genetic-Pareto) optimization approach
|
55
|
+
* **ParameterOptimizer** - Optimizes LLM call parameters (temperature, top_p, etc.) using Bayesian optimization
|
54
56
|
|
55
57
|
## 🎯 Key Features
|
56
58
|
|
@@ -1,10 +1,10 @@
|
|
1
|
-
opik_optimizer/__init__.py,sha256=
|
1
|
+
opik_optimizer/__init__.py,sha256=Vhvp9GwFPPxIGLa6dV12GAdN9WFt3a4rK1FjRsh5kn4,1455
|
2
2
|
opik_optimizer/_throttle.py,sha256=1JXIhYlo0IaqCgwmNB0Hnh9CYhYPkwRFdVGIcE7pVNg,1362
|
3
|
-
opik_optimizer/base_optimizer.py,sha256=
|
3
|
+
opik_optimizer/base_optimizer.py,sha256=TKQknIvhJ1H5LOxhhkXIzjEepx3h0j0jyNsTGZ7EFLI,21410
|
4
4
|
opik_optimizer/cache_config.py,sha256=Xd3NdUsL7bLQWoNe3pESqH4nHucU1iNTSGp-RqbwDog,599
|
5
5
|
opik_optimizer/logging_config.py,sha256=TmxX0C1P20amxoXuiNQvlENOjdSNfWwvL8jFy206VWM,3837
|
6
6
|
opik_optimizer/optimizable_agent.py,sha256=R0_BdwdHyZGWTw3oSvTg8FULDOYM8XaTiPNR3qV8DkQ,6344
|
7
|
-
opik_optimizer/optimization_result.py,sha256=
|
7
|
+
opik_optimizer/optimization_result.py,sha256=sG-Yr-hOaH9zx_I5S6_W3v6j8nPUhwYdS333jVM4Gus,17218
|
8
8
|
opik_optimizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
opik_optimizer/reporting_utils.py,sha256=dcECFmzZ_J-DKoukMDEE_fm7X8sdQyl_ijTddvQtepE,8287
|
10
10
|
opik_optimizer/task_evaluator.py,sha256=1hILYwJLtn7XpPX96JjubnlMasmudVTHMVK3pmd22bE,4312
|
@@ -62,13 +62,19 @@ opik_optimizer/optimization_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
|
|
62
62
|
opik_optimizer/optimization_config/chat_prompt.py,sha256=d3jwM1UvUeRQOSsYHa5GD842VO3JWjVDmB3ROUGp57c,7089
|
63
63
|
opik_optimizer/optimization_config/configs.py,sha256=EGacRNnl6TeWuf8RNsxpP6Nh5JhogjK-JxKllK8dQr0,413
|
64
64
|
opik_optimizer/optimization_config/mappers.py,sha256=4uBoPaIvCo4bqt_w-4rJyVe2LMAP_W7p6xxnDmGT-Sk,1724
|
65
|
+
opik_optimizer/parameter_optimizer/__init__.py,sha256=Eg-LEFBJqnOFw7i2B_YH27CoIGDPb5y_q1ar-ZpjtYo,308
|
66
|
+
opik_optimizer/parameter_optimizer/parameter_optimizer.py,sha256=eDd9tFQinz2lKsEJtikCBVzSWMK4saI9bhUY2NtDEg0,14955
|
67
|
+
opik_optimizer/parameter_optimizer/parameter_search_space.py,sha256=rgTNK8HPbdDiVm4GVX2QESTmQPhPFj4UkxqZfAy9JAA,4659
|
68
|
+
opik_optimizer/parameter_optimizer/parameter_spec.py,sha256=HzYT_dHBTfZtx403mY-Epv_IEqn4kYuYBZ6QUdkFRiY,8064
|
69
|
+
opik_optimizer/parameter_optimizer/search_space_types.py,sha256=UajTA2QKikEWazokDNO7j141gc2WxxYYiDRnFFjXi6M,512
|
70
|
+
opik_optimizer/parameter_optimizer/sensitivity_analysis.py,sha256=8KEMVMHsmcoiK21Cq1-We6_Pw_6LX9qBX9Az4-tmj_w,2146
|
65
71
|
opik_optimizer/utils/__init__.py,sha256=Ee0SnTPOcwRwp93M6Lh-X913lfSIwnvCiYYh5cpdRQE,486
|
66
72
|
opik_optimizer/utils/colbert.py,sha256=qSrzKUUGw7P92mLy4Ofug5pBGeTsHBLMJXlXSJSfKuo,8147
|
67
73
|
opik_optimizer/utils/core.py,sha256=5GT1vp6fW8ICO42LHMX14BjR-xEb6afAKjM7b1Evx5M,15298
|
68
74
|
opik_optimizer/utils/dataset_utils.py,sha256=dqRUGOekjeNWL0J15R8xFwLyKJDJynJXzVyQmt8rhHA,1464
|
69
75
|
opik_optimizer/utils/prompt_segments.py,sha256=1zUITSccJ82Njac1rmANzim4WWM6rVac61mfluS7lFE,5931
|
70
|
-
opik_optimizer-2.0.
|
71
|
-
opik_optimizer-2.0.
|
72
|
-
opik_optimizer-2.0.
|
73
|
-
opik_optimizer-2.0.
|
74
|
-
opik_optimizer-2.0.
|
76
|
+
opik_optimizer-2.1.0.dist-info/licenses/LICENSE,sha256=V-0VHJOBdcA_teT8VymvsBUQ1-CZU6yJRmMEjec_8tA,11372
|
77
|
+
opik_optimizer-2.1.0.dist-info/METADATA,sha256=4ZKzeqD4iIGwgS2CKYRlF7MYK5Zink37AVOU_D3U9gY,12821
|
78
|
+
opik_optimizer-2.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
79
|
+
opik_optimizer-2.1.0.dist-info/top_level.txt,sha256=ondOlpq6_yFckqpxoAHSfzZS2N-JfgmA-QQhOJfz7m0,15
|
80
|
+
opik_optimizer-2.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|