opik-optimizer 1.0.6__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +4 -0
- opik_optimizer/_throttle.py +2 -1
- opik_optimizer/base_optimizer.py +402 -28
- opik_optimizer/data/context7_eval.jsonl +3 -0
- opik_optimizer/datasets/context7_eval.py +90 -0
- opik_optimizer/datasets/tiny_test.py +33 -34
- opik_optimizer/datasets/truthful_qa.py +2 -2
- opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +136 -0
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +289 -966
- opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
- opik_optimizer/evolutionary_optimizer/llm_support.py +136 -0
- opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +306 -0
- opik_optimizer/evolutionary_optimizer/population_ops.py +228 -0
- opik_optimizer/evolutionary_optimizer/prompts.py +352 -0
- opik_optimizer/evolutionary_optimizer/reporting.py +28 -4
- opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +90 -81
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
- opik_optimizer/gepa_optimizer/__init__.py +3 -0
- opik_optimizer/gepa_optimizer/adapter.py +154 -0
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +653 -0
- opik_optimizer/gepa_optimizer/reporting.py +181 -0
- opik_optimizer/logging_config.py +42 -7
- opik_optimizer/mcp_utils/__init__.py +22 -0
- opik_optimizer/mcp_utils/mcp.py +541 -0
- opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
- opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
- opik_optimizer/mcp_utils/mcp_workflow.py +547 -0
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +470 -134
- opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
- opik_optimizer/mipro_optimizer/_lm.py +30 -23
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +52 -51
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +126 -46
- opik_optimizer/mipro_optimizer/utils.py +2 -4
- opik_optimizer/optimizable_agent.py +21 -16
- opik_optimizer/optimization_config/chat_prompt.py +44 -23
- opik_optimizer/optimization_config/configs.py +3 -3
- opik_optimizer/optimization_config/mappers.py +9 -8
- opik_optimizer/optimization_result.py +22 -14
- opik_optimizer/reporting_utils.py +61 -10
- opik_optimizer/task_evaluator.py +9 -8
- opik_optimizer/utils/__init__.py +15 -0
- opik_optimizer/utils/colbert.py +236 -0
- opik_optimizer/{utils.py → utils/core.py} +160 -33
- opik_optimizer/utils/dataset_utils.py +49 -0
- opik_optimizer/utils/prompt_segments.py +186 -0
- opik_optimizer-2.0.0.dist-info/METADATA +345 -0
- opik_optimizer-2.0.0.dist-info/RECORD +74 -0
- opik_optimizer-2.0.0.dist-info/licenses/LICENSE +203 -0
- opik_optimizer-1.0.6.dist-info/METADATA +0 -181
- opik_optimizer-1.0.6.dist-info/RECORD +0 -50
- opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
|
|
1
1
|
import os
|
2
2
|
import random
|
3
3
|
from datetime import datetime
|
4
|
-
from typing import
|
4
|
+
from typing import Any, Literal
|
5
|
+
from collections.abc import Callable
|
5
6
|
import logging
|
6
7
|
|
7
8
|
import dspy
|
@@ -14,9 +15,9 @@ from opik.integrations.dspy.callback import OpikCallback
|
|
14
15
|
from opik.opik_context import get_current_span_data
|
15
16
|
|
16
17
|
from ..optimization_result import OptimizationResult
|
17
|
-
from ..utils import optimization_context
|
18
18
|
from ..base_optimizer import BaseOptimizer
|
19
19
|
from ..optimization_config.configs import TaskConfig
|
20
|
+
from ..optimization_config import chat_prompt
|
20
21
|
from ._lm import LM
|
21
22
|
from ._mipro_optimizer_v2 import MIPROv2
|
22
23
|
from .utils import (
|
@@ -37,30 +38,42 @@ class MiproOptimizer(BaseOptimizer):
|
|
37
38
|
def __init__(
|
38
39
|
self,
|
39
40
|
model,
|
40
|
-
project_name:
|
41
|
+
project_name: str | None = None,
|
41
42
|
verbose: int = 1,
|
42
43
|
**model_kwargs,
|
43
44
|
):
|
44
45
|
super().__init__(model=model, verbose=verbose, **model_kwargs)
|
45
46
|
self.tools = []
|
46
47
|
self.project_name = project_name
|
48
|
+
if "n_threads" in self.model_kwargs:
|
49
|
+
# To allow compatibility with other optimizers:
|
50
|
+
self.model_kwargs["num_threads"] = self.model_kwargs["n_threads"]
|
47
51
|
self.num_threads = self.model_kwargs.pop("num_threads", 6)
|
48
52
|
self.model_kwargs["model"] = self.model
|
49
53
|
# FIXME: add mipro_optimizer=True - It does not count the LLM calls made internally by DSPy during MiproOptimizer.optimizer.compile().
|
50
54
|
self.lm = LM(**self.model_kwargs)
|
55
|
+
setattr(self.lm, "parent_optimizer", self)
|
51
56
|
opik_callback = OpikCallback(project_name=self.project_name, log_graph=True)
|
52
57
|
dspy.configure(lm=self.lm, callbacks=[opik_callback])
|
53
58
|
logger.debug(f"Initialized MiproOptimizer with model: {model}")
|
54
59
|
|
60
|
+
def get_optimizer_metadata(self) -> dict[str, Any]:
|
61
|
+
return self._drop_none(
|
62
|
+
{
|
63
|
+
"project_name": self.project_name,
|
64
|
+
"num_threads": self.num_threads,
|
65
|
+
}
|
66
|
+
)
|
67
|
+
|
55
68
|
def evaluate_prompt(
|
56
69
|
self,
|
57
|
-
dataset:
|
70
|
+
dataset: str | Dataset,
|
58
71
|
metric: Callable,
|
59
72
|
task_config: TaskConfig,
|
60
|
-
prompt:
|
73
|
+
prompt: str | dspy.Module | OptimizationResult | None = None,
|
61
74
|
n_samples: int = 10,
|
62
|
-
dataset_item_ids:
|
63
|
-
experiment_config:
|
75
|
+
dataset_item_ids: list[str] | None = None,
|
76
|
+
experiment_config: dict | None = None,
|
64
77
|
verbose: int = 1,
|
65
78
|
**kwargs,
|
66
79
|
) -> float:
|
@@ -83,7 +96,7 @@ class MiproOptimizer(BaseOptimizer):
|
|
83
96
|
"""
|
84
97
|
# FIMXE: call super when it is ready
|
85
98
|
# FIXME: Intermediate values:
|
86
|
-
self.
|
99
|
+
self.increment_llm_counter()
|
87
100
|
input_key = task_config.input_dataset_fields[0] # FIXME: allow all inputs
|
88
101
|
output_key = task_config.output_dataset_field
|
89
102
|
|
@@ -238,23 +251,57 @@ class MiproOptimizer(BaseOptimizer):
|
|
238
251
|
|
239
252
|
def optimize_prompt(
|
240
253
|
self,
|
241
|
-
|
254
|
+
prompt: chat_prompt.ChatPrompt,
|
255
|
+
dataset: str | Dataset,
|
242
256
|
metric: Callable,
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
n_samples: Optional[int] = 10,
|
248
|
-
auto: Optional[Literal["light", "medium", "heavy"]] = "light",
|
257
|
+
experiment_config: dict | None = None,
|
258
|
+
n_samples: int | None = 10,
|
259
|
+
auto_continue: bool = False,
|
260
|
+
agent_class: str | None = None,
|
249
261
|
**kwargs,
|
250
262
|
) -> OptimizationResult:
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
263
|
+
"""
|
264
|
+
Optimize a prompt using MIPRO (Multi-Input Prompt Optimization).
|
265
|
+
|
266
|
+
Args:
|
267
|
+
prompt: The chat prompt to optimize
|
268
|
+
dataset: Opik dataset (or dataset name) containing evaluation data
|
269
|
+
metric: Evaluation function that takes (dataset_item, llm_output) and returns a score
|
270
|
+
experiment_config: Optional configuration for the experiment
|
271
|
+
n_samples: Number of samples to use for optimization (default: 10)
|
272
|
+
auto_continue: Whether to auto-continue optimization (default: False)
|
273
|
+
agent_class: Custom agent class to use (default: None)
|
274
|
+
**kwargs: Additional arguments including:
|
275
|
+
task_config: TaskConfig instance (required)
|
276
|
+
num_candidates: Number of candidates to generate (default: 10)
|
277
|
+
num_trials: Number of trials to run (default: 3)
|
278
|
+
auto: Optimization mode - "light", "medium", or "heavy" (default: "light")
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
OptimizationResult: The optimization result containing the optimized prompt and metrics
|
282
|
+
|
283
|
+
Raises:
|
284
|
+
ValueError: If task_config is not provided
|
285
|
+
"""
|
286
|
+
# Resolve dataset names to Dataset objects for validation compatibility
|
287
|
+
if isinstance(dataset, str):
|
288
|
+
dataset_name = dataset
|
289
|
+
client = opik.Opik(project_name=self.project_name)
|
290
|
+
dataset = client.get_dataset(dataset_name)
|
291
|
+
|
292
|
+
# Use base class validation and setup methods
|
293
|
+
self.validate_optimization_inputs(prompt, dataset, metric)
|
294
|
+
|
295
|
+
# Extract MIPRO-specific parameters from kwargs
|
296
|
+
task_config = kwargs.pop("task_config", None)
|
297
|
+
if task_config is None:
|
298
|
+
raise ValueError("task_config is required for MiproOptimizer")
|
299
|
+
|
300
|
+
num_candidates = kwargs.pop("num_candidates", 10)
|
301
|
+
num_trials = kwargs.pop("num_trials", 3)
|
302
|
+
auto = kwargs.pop("auto", "light")
|
303
|
+
|
304
|
+
with self.create_optimization_context(dataset, metric) as optimization:
|
258
305
|
result = self._optimize_prompt(
|
259
306
|
dataset=dataset,
|
260
307
|
metric=metric,
|
@@ -271,15 +318,15 @@ class MiproOptimizer(BaseOptimizer):
|
|
271
318
|
|
272
319
|
def _optimize_prompt(
|
273
320
|
self,
|
274
|
-
dataset:
|
321
|
+
dataset: str | Dataset,
|
275
322
|
metric: Callable,
|
276
323
|
task_config: TaskConfig,
|
277
324
|
num_candidates: int = 10,
|
278
|
-
experiment_config:
|
279
|
-
optimization_id:
|
280
|
-
num_trials:
|
281
|
-
n_samples:
|
282
|
-
auto:
|
325
|
+
experiment_config: dict | None = None,
|
326
|
+
optimization_id: str | None = None,
|
327
|
+
num_trials: int | None = 3,
|
328
|
+
n_samples: int | None = 10,
|
329
|
+
auto: Literal["light", "medium", "heavy"] | None = "light",
|
283
330
|
**kwargs,
|
284
331
|
) -> OptimizationResult:
|
285
332
|
logger.info("Preparing MIPRO optimization...")
|
@@ -306,27 +353,26 @@ class MiproOptimizer(BaseOptimizer):
|
|
306
353
|
metric,
|
307
354
|
task_config,
|
308
355
|
num_candidates: int = 10,
|
309
|
-
experiment_config:
|
310
|
-
optimization_id:
|
311
|
-
num_trials:
|
312
|
-
n_samples:
|
313
|
-
auto:
|
356
|
+
experiment_config: dict | None = None,
|
357
|
+
optimization_id: str | None = None,
|
358
|
+
num_trials: int | None = 3,
|
359
|
+
n_samples: int | None = 10,
|
360
|
+
auto: Literal["light", "medium", "heavy"] | None = "light",
|
314
361
|
**kwargs,
|
315
362
|
) -> None:
|
316
363
|
# FIXME: Intermediate values:
|
317
|
-
self.
|
364
|
+
self.reset_counters() # Reset counters for run
|
318
365
|
prompt = task_config.instruction_prompt
|
319
366
|
input_key = task_config.input_dataset_fields[0] # FIXME: allow all
|
320
367
|
output_key = task_config.output_dataset_field
|
321
368
|
self.tools = task_config.tools
|
322
369
|
self.num_candidates = num_candidates
|
323
|
-
self.
|
370
|
+
self.auto = auto
|
324
371
|
self.input_key = input_key
|
325
372
|
self.output_key = output_key
|
326
373
|
self.prompt = prompt
|
327
374
|
self.num_trials = num_trials
|
328
375
|
self.n_samples = n_samples
|
329
|
-
self.auto = auto
|
330
376
|
|
331
377
|
# Convert to values for MIPRO:
|
332
378
|
if isinstance(dataset, str):
|
@@ -395,6 +441,19 @@ class MiproOptimizer(BaseOptimizer):
|
|
395
441
|
logger.debug(f"Using DSPy module: {type(self.module).__name__}")
|
396
442
|
logger.debug(f"Using metric function: {self.metric_function.__name__}")
|
397
443
|
|
444
|
+
def cleanup(self) -> None:
|
445
|
+
"""
|
446
|
+
Clean up MIPRO-specific resources.
|
447
|
+
"""
|
448
|
+
# Call parent cleanup
|
449
|
+
super().cleanup()
|
450
|
+
|
451
|
+
# Clear MIPRO-specific resources
|
452
|
+
self.tools = None
|
453
|
+
self.prompt = None
|
454
|
+
|
455
|
+
logger.debug("Cleaned up MIPRO-specific resources")
|
456
|
+
|
398
457
|
def load_from_checkpoint(self, filename):
|
399
458
|
"""
|
400
459
|
Load the module from a checkpoint.
|
@@ -508,12 +567,15 @@ class MiproOptimizer(BaseOptimizer):
|
|
508
567
|
}
|
509
568
|
],
|
510
569
|
score=0.0,
|
511
|
-
metric_name=
|
512
|
-
|
513
|
-
|
570
|
+
metric_name=(
|
571
|
+
self.opik_metric.__name__
|
572
|
+
if hasattr(self, "opik_metric")
|
573
|
+
else "unknown_metric"
|
574
|
+
),
|
514
575
|
details={"error": "No candidate programs generated by MIPRO"},
|
515
576
|
history=mipro_history_processed,
|
516
|
-
llm_calls=self.
|
577
|
+
llm_calls=self.llm_call_counter,
|
578
|
+
tool_calls=self.tool_call_counter,
|
517
579
|
)
|
518
580
|
|
519
581
|
self.module = self.get_best().details["program"]
|
@@ -545,7 +607,8 @@ class MiproOptimizer(BaseOptimizer):
|
|
545
607
|
demonstrations=best_program_details.demonstrations,
|
546
608
|
details=best_program_details.details,
|
547
609
|
history=mipro_history_processed,
|
548
|
-
llm_calls=self.
|
610
|
+
llm_calls=self.llm_call_counter,
|
611
|
+
tool_calls=self.tool_call_counter,
|
549
612
|
)
|
550
613
|
|
551
614
|
def get_best(self, position: int = 0) -> OptimizationResult:
|
@@ -553,6 +616,14 @@ class MiproOptimizer(BaseOptimizer):
|
|
553
616
|
logger.error(
|
554
617
|
"get_best() called but no best_programs found. MIPRO compile might have failed or yielded no results."
|
555
618
|
)
|
619
|
+
# Get LLM call count from the optimizer if available
|
620
|
+
dspy_llm_calls = (
|
621
|
+
getattr(self.optimizer, "total_calls", 0)
|
622
|
+
if hasattr(self, "optimizer") and self.optimizer
|
623
|
+
else 0
|
624
|
+
)
|
625
|
+
actual_llm_calls = max(self.llm_call_counter, dspy_llm_calls)
|
626
|
+
|
556
627
|
return OptimizationResult(
|
557
628
|
optimizer="MiproOptimizer",
|
558
629
|
prompt=[
|
@@ -564,12 +635,15 @@ class MiproOptimizer(BaseOptimizer):
|
|
564
635
|
}
|
565
636
|
],
|
566
637
|
score=0.0,
|
567
|
-
metric_name=
|
568
|
-
|
569
|
-
|
638
|
+
metric_name=(
|
639
|
+
getattr(self, "opik_metric", None).name
|
640
|
+
if hasattr(self, "opik_metric") and self.opik_metric
|
641
|
+
else "unknown_metric"
|
642
|
+
),
|
570
643
|
details={"error": "No programs generated or compile failed"},
|
571
644
|
history=[],
|
572
|
-
llm_calls=
|
645
|
+
llm_calls=actual_llm_calls,
|
646
|
+
tool_calls=self.tool_call_counter,
|
573
647
|
)
|
574
648
|
|
575
649
|
score = self.best_programs[position]["score"]
|
@@ -587,6 +661,11 @@ class MiproOptimizer(BaseOptimizer):
|
|
587
661
|
best_prompt = state["signature"]["instructions"]
|
588
662
|
demos = [x.toDict() for x in state["demos"]]
|
589
663
|
|
664
|
+
# Get LLM call count from the DSPy program module
|
665
|
+
dspy_llm_calls = getattr(program_module, "total_calls", 0)
|
666
|
+
# Use the higher of our counter or DSPy's counter
|
667
|
+
actual_llm_calls = max(self.llm_call_counter, dspy_llm_calls)
|
668
|
+
|
590
669
|
print(best_prompt)
|
591
670
|
return OptimizationResult(
|
592
671
|
optimizer="MiproOptimizer",
|
@@ -596,5 +675,6 @@ class MiproOptimizer(BaseOptimizer):
|
|
596
675
|
metric_name=self.opik_metric.__name__,
|
597
676
|
demonstrations=demos,
|
598
677
|
details={"program": program_module},
|
599
|
-
llm_calls=
|
678
|
+
llm_calls=actual_llm_calls,
|
679
|
+
tool_calls=self.tool_call_counter,
|
600
680
|
)
|
@@ -1,5 +1,3 @@
|
|
1
|
-
from typing import Dict, Optional
|
2
|
-
|
3
1
|
import uuid
|
4
2
|
import dspy
|
5
3
|
import re
|
@@ -61,7 +59,7 @@ def opik_metric_to_dspy(metric, output):
|
|
61
59
|
|
62
60
|
|
63
61
|
def create_dspy_training_set(
|
64
|
-
data: list[dict], input: str, n_samples:
|
62
|
+
data: list[dict], input: str, n_samples: int | None = None
|
65
63
|
) -> list[dspy.Example]:
|
66
64
|
"""
|
67
65
|
Turn a list of dicts into a list of dspy Examples
|
@@ -80,7 +78,7 @@ def create_dspy_training_set(
|
|
80
78
|
return output
|
81
79
|
|
82
80
|
|
83
|
-
def get_tool_prompts(tool_names, text: str) ->
|
81
|
+
def get_tool_prompts(tool_names, text: str) -> dict[str, str]:
|
84
82
|
"""
|
85
83
|
Extract the embedded tool prompts from a text.
|
86
84
|
"""
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import Any, TYPE_CHECKING
|
2
2
|
import json
|
3
3
|
import os
|
4
4
|
|
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
|
|
16
16
|
from .optimization_config.chat_prompt import ChatPrompt
|
17
17
|
|
18
18
|
|
19
|
-
def tools_to_dict(tools:
|
19
|
+
def tools_to_dict(tools: dict[str, dict[str, Any]]) -> dict[str, Any]:
|
20
20
|
retval = {}
|
21
21
|
for name in tools:
|
22
22
|
parts = {}
|
@@ -38,11 +38,11 @@ class OptimizableAgent:
|
|
38
38
|
project_name (Optional[str]): The project name for tracking
|
39
39
|
"""
|
40
40
|
|
41
|
-
model:
|
42
|
-
model_kwargs:
|
43
|
-
project_name:
|
44
|
-
input_dataset_field:
|
45
|
-
prompts:
|
41
|
+
model: str | None = None
|
42
|
+
model_kwargs: dict[str, Any] = {}
|
43
|
+
project_name: str | None = "Default Project"
|
44
|
+
input_dataset_field: str | None = None
|
45
|
+
prompts: dict[str, "ChatPrompt"]
|
46
46
|
prompt: "ChatPrompt"
|
47
47
|
|
48
48
|
def __init__(self, prompt: "ChatPrompt") -> None:
|
@@ -71,8 +71,8 @@ class OptimizableAgent:
|
|
71
71
|
@_throttle.rate_limited(_limiter)
|
72
72
|
def _llm_complete(
|
73
73
|
self,
|
74
|
-
messages:
|
75
|
-
tools:
|
74
|
+
messages: list[dict[str, str]],
|
75
|
+
tools: list[dict[str, str]] | None,
|
76
76
|
seed: int,
|
77
77
|
) -> Any:
|
78
78
|
response = litellm.completion(
|
@@ -91,10 +91,10 @@ class OptimizableAgent:
|
|
91
91
|
|
92
92
|
def llm_invoke(
|
93
93
|
self,
|
94
|
-
query:
|
95
|
-
messages:
|
96
|
-
seed:
|
97
|
-
allow_tool_use:
|
94
|
+
query: str | None = None,
|
95
|
+
messages: list[dict[str, str]] | None = None,
|
96
|
+
seed: int | None = None,
|
97
|
+
allow_tool_use: bool | None = False,
|
98
98
|
) -> str:
|
99
99
|
"""
|
100
100
|
NOTE: this is the default LiteLLM API. It is used
|
@@ -147,6 +147,11 @@ class OptimizableAgent:
|
|
147
147
|
"content": str(tool_result),
|
148
148
|
}
|
149
149
|
)
|
150
|
+
# Increment tool call counter if we have access to the optimizer
|
151
|
+
if hasattr(self, "optimizer") and hasattr(
|
152
|
+
self.optimizer, "increment_tool_counter"
|
153
|
+
):
|
154
|
+
self.optimizer.increment_tool_counter()
|
150
155
|
else:
|
151
156
|
final_response = msg["content"]
|
152
157
|
break
|
@@ -156,14 +161,14 @@ class OptimizableAgent:
|
|
156
161
|
result = response.choices[0].message.content
|
157
162
|
return result
|
158
163
|
|
159
|
-
def invoke_dataset_item(self, dataset_item:
|
164
|
+
def invoke_dataset_item(self, dataset_item: dict[str, str]) -> str:
|
160
165
|
messages = self.prompt.get_messages(dataset_item)
|
161
166
|
return self.invoke(messages)
|
162
167
|
|
163
168
|
def invoke(
|
164
169
|
self,
|
165
|
-
messages:
|
166
|
-
seed:
|
170
|
+
messages: list[dict[str, str]],
|
171
|
+
seed: int | None = None,
|
167
172
|
) -> str:
|
168
173
|
"""
|
169
174
|
Invoke the agent with a dataset item.
|
@@ -1,4 +1,5 @@
|
|
1
|
-
from typing import Any
|
1
|
+
from typing import Any
|
2
|
+
from collections.abc import Callable
|
2
3
|
|
3
4
|
import copy
|
4
5
|
|
@@ -10,7 +11,7 @@ from opik import track
|
|
10
11
|
class Tool(BaseModel):
|
11
12
|
name: str = Field(..., description="Name of the tool")
|
12
13
|
description: str = Field(..., description="Description of the tool")
|
13
|
-
parameters:
|
14
|
+
parameters: dict[str, Any] = Field(
|
14
15
|
..., description="JSON Schema defining the input parameters for the tool"
|
15
16
|
)
|
16
17
|
|
@@ -33,14 +34,14 @@ class ChatPrompt:
|
|
33
34
|
def __init__(
|
34
35
|
self,
|
35
36
|
name: str = "chat-prompt",
|
36
|
-
system:
|
37
|
-
user:
|
38
|
-
messages:
|
39
|
-
tools:
|
40
|
-
function_map:
|
41
|
-
model:
|
42
|
-
invoke:
|
43
|
-
project_name:
|
37
|
+
system: str | None = None,
|
38
|
+
user: str | None = None,
|
39
|
+
messages: list[dict[str, str]] | None = None,
|
40
|
+
tools: list[dict[str, Any]] | None = None,
|
41
|
+
function_map: dict[str, Callable] | None = None,
|
42
|
+
model: str | None = None,
|
43
|
+
invoke: Callable | None = None,
|
44
|
+
project_name: str | None = "Default Project",
|
44
45
|
**model_kwargs: Any,
|
45
46
|
) -> None:
|
46
47
|
if system is None and user is None and messages is None:
|
@@ -97,8 +98,8 @@ class ChatPrompt:
|
|
97
98
|
|
98
99
|
def get_messages(
|
99
100
|
self,
|
100
|
-
dataset_item:
|
101
|
-
) ->
|
101
|
+
dataset_item: dict[str, str] | None = None,
|
102
|
+
) -> list[dict[str, str]]:
|
102
103
|
# This is a copy, so we can alter the messages:
|
103
104
|
messages = self._standardize_prompts()
|
104
105
|
|
@@ -113,8 +114,8 @@ class ChatPrompt:
|
|
113
114
|
)
|
114
115
|
return messages
|
115
116
|
|
116
|
-
def _standardize_prompts(self, **kwargs: Any) ->
|
117
|
-
standardize_messages:
|
117
|
+
def _standardize_prompts(self, **kwargs: Any) -> list[dict[str, str]]:
|
118
|
+
standardize_messages: list[dict[str, str]] = []
|
118
119
|
|
119
120
|
if self.system is not None:
|
120
121
|
standardize_messages.append({"role": "system", "content": self.system})
|
@@ -128,13 +129,13 @@ class ChatPrompt:
|
|
128
129
|
|
129
130
|
return copy.deepcopy(standardize_messages)
|
130
131
|
|
131
|
-
def to_dict(self) ->
|
132
|
+
def to_dict(self) -> dict[str, str | list[dict[str, str]]]:
|
132
133
|
"""Convert ChatPrompt to a dictionary for JSON serialization.
|
133
134
|
|
134
135
|
Returns:
|
135
136
|
Dict containing the serializable representation of this ChatPrompt
|
136
137
|
"""
|
137
|
-
retval:
|
138
|
+
retval: dict[str, str | list[dict[str, str]]] = {}
|
138
139
|
if self.system is not None:
|
139
140
|
retval["system"] = self.system
|
140
141
|
if self.user is not None:
|
@@ -144,29 +145,49 @@ class ChatPrompt:
|
|
144
145
|
return retval
|
145
146
|
|
146
147
|
def copy(self) -> "ChatPrompt":
|
148
|
+
"""Shallow clone preserving model configuration and tools."""
|
149
|
+
|
150
|
+
# TODO(opik-mcp): once we introduce a dedicated MCP prompt subclass,
|
151
|
+
# migrate callers away from generic copies so optimizer metadata stays typed.
|
152
|
+
model_kwargs = (
|
153
|
+
copy.deepcopy(self.model_kwargs) if self.model_kwargs is not None else {}
|
154
|
+
)
|
147
155
|
return ChatPrompt(
|
156
|
+
name=self.name,
|
148
157
|
system=self.system,
|
149
158
|
user=self.user,
|
150
159
|
messages=copy.deepcopy(self.messages),
|
151
|
-
tools=self.tools,
|
160
|
+
tools=copy.deepcopy(self.tools),
|
152
161
|
function_map=self.function_map,
|
162
|
+
model=self.model,
|
163
|
+
invoke=self.invoke,
|
164
|
+
project_name=self.project_name,
|
165
|
+
**model_kwargs,
|
153
166
|
)
|
154
167
|
|
155
|
-
def set_messages(self, messages:
|
168
|
+
def set_messages(self, messages: list[dict[str, Any]]) -> None:
|
156
169
|
self.system = None
|
157
170
|
self.user = None
|
158
171
|
self.messages = copy.deepcopy(messages)
|
159
172
|
|
173
|
+
# TODO(opik): remove this stop-gap once MetaPromptOptimizer supports MCP.
|
174
|
+
# Provides a second-pass flow so tool results can be appended before
|
175
|
+
# rerunning the model.
|
176
|
+
def with_messages(self, messages: list[dict[str, Any]]) -> "ChatPrompt":
|
177
|
+
cloned = self.copy()
|
178
|
+
cloned.set_messages(messages)
|
179
|
+
return cloned
|
180
|
+
|
160
181
|
@classmethod
|
161
182
|
def model_validate(
|
162
183
|
cls,
|
163
184
|
obj: Any,
|
164
185
|
*,
|
165
|
-
strict:
|
166
|
-
from_attributes:
|
167
|
-
context:
|
168
|
-
by_alias:
|
169
|
-
by_name:
|
186
|
+
strict: bool | None = None,
|
187
|
+
from_attributes: bool | None = None,
|
188
|
+
context: Any | None = None,
|
189
|
+
by_alias: bool | None = None,
|
190
|
+
by_name: bool | None = None,
|
170
191
|
) -> "ChatPrompt":
|
171
192
|
"""Custom validation method to handle nested objects during deserialization."""
|
172
193
|
return ChatPrompt(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Module containing configuration classes for optimization."""
|
2
2
|
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any
|
4
4
|
|
5
5
|
import pydantic
|
6
6
|
|
@@ -12,6 +12,6 @@ class TaskConfig(pydantic.BaseModel):
|
|
12
12
|
|
13
13
|
instruction_prompt: str
|
14
14
|
use_chat_prompt: bool = False
|
15
|
-
input_dataset_fields:
|
15
|
+
input_dataset_fields: list[str]
|
16
16
|
output_dataset_field: str
|
17
|
-
tools:
|
17
|
+
tools: list[Any] = []
|
@@ -1,4 +1,5 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import Any
|
2
|
+
from collections.abc import Callable
|
2
3
|
|
3
4
|
EVALUATED_LLM_TASK_OUTPUT = "llm_output"
|
4
5
|
|
@@ -8,8 +9,8 @@ class Mapper:
|
|
8
9
|
|
9
10
|
def __init__(
|
10
11
|
self,
|
11
|
-
name:
|
12
|
-
transform:
|
12
|
+
name: str | None = None,
|
13
|
+
transform: Callable[[Any], Any] | None = None,
|
13
14
|
):
|
14
15
|
if name is not None and transform is not None:
|
15
16
|
raise ValueError("Only one of name or transform can be provided")
|
@@ -27,9 +28,9 @@ class Mapper:
|
|
27
28
|
|
28
29
|
def from_dataset_field(
|
29
30
|
*,
|
30
|
-
name:
|
31
|
-
transform:
|
32
|
-
) ->
|
31
|
+
name: str | None = None,
|
32
|
+
transform: Callable[[dict[str, Any]], Any] | None = None,
|
33
|
+
) -> str | Callable[[dict[str, Any]], Any]:
|
33
34
|
if name is not None and transform is not None:
|
34
35
|
raise ValueError("Only one of name or transform can be provided")
|
35
36
|
|
@@ -47,8 +48,8 @@ def from_llm_response_text() -> str:
|
|
47
48
|
|
48
49
|
|
49
50
|
def from_agent_output(
|
50
|
-
*, name:
|
51
|
-
) ->
|
51
|
+
*, name: str | None = None, transform: Callable[[Any], Any] | None = None
|
52
|
+
) -> str | Callable[[Any], Any]:
|
52
53
|
if name is not None and transform is not None:
|
53
54
|
raise ValueError("Only one of name or transform can be provided")
|
54
55
|
|