opik-optimizer 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +2 -0
- opik_optimizer/base_optimizer.py +376 -19
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +80 -17
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +179 -39
- opik_optimizer/evolutionary_optimizer/llm_support.py +3 -1
- opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +17 -3
- opik_optimizer/evolutionary_optimizer/population_ops.py +5 -0
- opik_optimizer/evolutionary_optimizer/prompts.py +47 -0
- opik_optimizer/evolutionary_optimizer/reporting.py +12 -0
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +65 -59
- opik_optimizer/gepa_optimizer/adapter.py +5 -3
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +163 -66
- opik_optimizer/mcp_utils/mcp_workflow.py +57 -3
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +75 -69
- opik_optimizer/mipro_optimizer/_lm.py +10 -3
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +1 -1
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +96 -21
- opik_optimizer/optimizable_agent.py +5 -0
- opik_optimizer/optimization_result.py +1 -0
- opik_optimizer/utils/core.py +56 -14
- {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/METADATA +96 -9
- {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/RECORD +27 -26
- /opik_optimizer/{colbert.py → utils/colbert.py} +0 -0
- {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import json
|
|
3
3
|
import logging
|
4
4
|
import os
|
5
5
|
import textwrap
|
6
|
+
import warnings
|
6
7
|
from typing import Any, cast
|
7
8
|
from collections.abc import Callable
|
8
9
|
|
@@ -11,12 +12,10 @@ import opik
|
|
11
12
|
from litellm.caching import Cache
|
12
13
|
from litellm.types.caching import LiteLLMCacheType
|
13
14
|
from opik import Dataset
|
14
|
-
from opik.api_objects import opik_client
|
15
15
|
from opik.environment import get_tqdm_for_current_environment
|
16
16
|
from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
|
17
17
|
|
18
18
|
from opik_optimizer import task_evaluator
|
19
|
-
from ..utils.core import create_litellm_agent_class
|
20
19
|
|
21
20
|
from .. import _throttle
|
22
21
|
from ..base_optimizer import BaseOptimizer, OptimizationRound
|
@@ -143,6 +142,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
143
142
|
verbose: int = 1,
|
144
143
|
enable_context: bool = True,
|
145
144
|
n_threads: int = 12,
|
145
|
+
seed: int = 42,
|
146
146
|
**model_kwargs: Any,
|
147
147
|
) -> None:
|
148
148
|
"""
|
@@ -157,22 +157,28 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
157
157
|
**model_kwargs: Additional model parameters
|
158
158
|
"""
|
159
159
|
if "project_name" in model_kwargs:
|
160
|
-
|
161
|
-
"
|
160
|
+
warnings.warn(
|
161
|
+
"The 'project_name' parameter in optimizer constructor is deprecated. "
|
162
|
+
"Set project_name in the ChatPrompt instead.",
|
163
|
+
DeprecationWarning,
|
164
|
+
stacklevel=2,
|
162
165
|
)
|
163
166
|
del model_kwargs["project_name"]
|
164
167
|
|
165
|
-
super().__init__(model=model, verbose=verbose, **model_kwargs)
|
168
|
+
super().__init__(model=model, verbose=verbose, seed=seed, **model_kwargs)
|
166
169
|
self.reasoning_model = reasoning_model if reasoning_model is not None else model
|
167
170
|
self.rounds = rounds
|
168
171
|
self.num_prompts_per_round = num_prompts_per_round
|
169
172
|
if num_threads is not None:
|
170
|
-
|
173
|
+
warnings.warn(
|
174
|
+
"The 'num_threads' parameter is deprecated and will be removed in a future version. "
|
175
|
+
"Use 'n_threads' instead.",
|
176
|
+
DeprecationWarning,
|
177
|
+
stacklevel=2,
|
178
|
+
)
|
171
179
|
n_threads = num_threads
|
172
180
|
self.num_threads = n_threads
|
173
181
|
self.dataset: Dataset | None = None
|
174
|
-
self._opik_client = opik_client.get_client_cached()
|
175
|
-
self.llm_call_counter = 0
|
176
182
|
self.enable_context = enable_context
|
177
183
|
logger.debug(
|
178
184
|
f"Initialized MetaPromptOptimizer with model={model}, reasoning_model={self.reasoning_model}"
|
@@ -181,6 +187,14 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
181
187
|
f"Optimization rounds: {rounds}, Prompts/round: {num_prompts_per_round}"
|
182
188
|
)
|
183
189
|
|
190
|
+
def get_optimizer_metadata(self) -> dict[str, Any]:
|
191
|
+
return {
|
192
|
+
"rounds": self.rounds,
|
193
|
+
"num_prompts_per_round": self.num_prompts_per_round,
|
194
|
+
"reasoning_model": self.reasoning_model,
|
195
|
+
"enable_context": self.enable_context,
|
196
|
+
}
|
197
|
+
|
184
198
|
@_throttle.rate_limited(_rate_limiter)
|
185
199
|
def _call_model(
|
186
200
|
self,
|
@@ -190,7 +204,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
190
204
|
optimization_id: str | None = None,
|
191
205
|
) -> str:
|
192
206
|
"""Call the model with the given prompt and return the response."""
|
193
|
-
self.
|
207
|
+
self.increment_llm_counter()
|
194
208
|
# Note: Basic retry logic could be added here using tenacity
|
195
209
|
try:
|
196
210
|
# Basic LLM parameters (e.g., temperature, max_tokens)
|
@@ -321,25 +335,28 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
321
335
|
subset_size = None # Use all items for final checks
|
322
336
|
logger.debug("Using full dataset for evaluation")
|
323
337
|
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
"
|
333
|
-
"
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
338
|
+
configuration_updates = self._drop_none(
|
339
|
+
{
|
340
|
+
"n_samples": subset_size,
|
341
|
+
"use_full_dataset": use_full_dataset,
|
342
|
+
}
|
343
|
+
)
|
344
|
+
meta_metadata = self._drop_none(
|
345
|
+
{
|
346
|
+
"optimization_id": optimization_id,
|
347
|
+
"stage": "trial_evaluation" if not use_full_dataset else "final_eval",
|
348
|
+
}
|
349
|
+
)
|
350
|
+
experiment_config = self._prepare_experiment_config(
|
351
|
+
prompt=prompt,
|
352
|
+
dataset=dataset,
|
353
|
+
metric=metric,
|
354
|
+
experiment_config=experiment_config,
|
355
|
+
configuration_updates=configuration_updates,
|
356
|
+
additional_metadata={"meta_prompt": meta_metadata}
|
357
|
+
if meta_metadata
|
358
|
+
else None,
|
359
|
+
)
|
343
360
|
|
344
361
|
def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
|
345
362
|
new_prompt = prompt.copy()
|
@@ -357,7 +374,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
357
374
|
)
|
358
375
|
raw_model_output = agent.llm_invoke(
|
359
376
|
messages=messages,
|
360
|
-
seed=
|
377
|
+
seed=self.seed,
|
361
378
|
allow_tool_use=True,
|
362
379
|
)
|
363
380
|
except Exception as exc:
|
@@ -391,7 +408,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
391
408
|
)
|
392
409
|
final_response = agent.llm_invoke(
|
393
410
|
messages=second_pass_messages,
|
394
|
-
seed=
|
411
|
+
seed=self.seed,
|
395
412
|
allow_tool_use=mcp_config.allow_tool_use_on_second_pass,
|
396
413
|
)
|
397
414
|
else:
|
@@ -459,36 +476,25 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
459
476
|
Optimize a prompt using meta-reasoning.
|
460
477
|
|
461
478
|
Args:
|
479
|
+
prompt: The prompt to optimize
|
462
480
|
dataset: The dataset to evaluate against
|
463
481
|
metric: The metric to use for evaluation
|
464
482
|
experiment_config: A dictionary to log with the experiments
|
465
483
|
n_samples: The number of dataset items to use for evaluation
|
466
484
|
auto_continue: If True, the algorithm may continue if goal not met
|
467
|
-
|
485
|
+
agent_class: Optional agent class to use
|
486
|
+
**kwargs: Additional arguments for evaluation, including:
|
487
|
+
mcp_config (MCPExecutionConfig | None): MCP tool calling configuration (default: None)
|
488
|
+
candidate_generator: Optional candidate generator
|
489
|
+
candidate_generator_kwargs: Optional kwargs for candidate generator
|
468
490
|
|
469
491
|
Returns:
|
470
492
|
OptimizationResult: Structured result containing optimization details
|
471
493
|
"""
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
raise ValueError("Dataset must be a Dataset object")
|
477
|
-
|
478
|
-
if not callable(metric):
|
479
|
-
raise ValueError(
|
480
|
-
"Metric must be a function that takes `dataset_item` and `llm_output` as arguments."
|
481
|
-
)
|
482
|
-
|
483
|
-
if prompt.model is None:
|
484
|
-
prompt.model = self.model
|
485
|
-
if prompt.model_kwargs is None:
|
486
|
-
prompt.model_kwargs = self.model_kwargs
|
487
|
-
|
488
|
-
if agent_class is None:
|
489
|
-
self.agent_class = create_litellm_agent_class(prompt)
|
490
|
-
else:
|
491
|
-
self.agent_class = agent_class
|
494
|
+
# Use base class validation and setup methods
|
495
|
+
self.validate_optimization_inputs(prompt, dataset, metric)
|
496
|
+
self.configure_prompt_model(prompt)
|
497
|
+
self.agent_class = self.setup_agent_class(prompt, agent_class)
|
492
498
|
|
493
499
|
total_items = len(dataset.get_items())
|
494
500
|
if n_samples is not None and n_samples > total_items:
|
@@ -499,7 +505,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
499
505
|
|
500
506
|
optimization = None
|
501
507
|
try:
|
502
|
-
optimization = self.
|
508
|
+
optimization = self.opik_client.create_optimization(
|
503
509
|
dataset_name=dataset.name,
|
504
510
|
objective_name=getattr(metric, "__name__", str(metric)),
|
505
511
|
metadata={"optimizer": self.__class__.__name__},
|
@@ -633,26 +639,25 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
633
639
|
self.auto_continue = auto_continue
|
634
640
|
self.dataset = dataset
|
635
641
|
self.prompt = prompt
|
636
|
-
self.
|
642
|
+
self.reset_counters() # Reset counters for run
|
637
643
|
initial_prompt = prompt
|
638
644
|
|
639
645
|
current_prompt = prompt
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
}
|
646
|
+
configuration_updates = self._drop_none(
|
647
|
+
{
|
648
|
+
"rounds": self.rounds,
|
649
|
+
"num_prompts_per_round": self.num_prompts_per_round,
|
650
|
+
}
|
651
|
+
)
|
652
|
+
meta_metadata = {"stage": "initial"}
|
653
|
+
experiment_config = self._prepare_experiment_config(
|
654
|
+
prompt=prompt,
|
655
|
+
dataset=dataset,
|
656
|
+
metric=metric,
|
657
|
+
experiment_config=experiment_config,
|
658
|
+
configuration_updates=configuration_updates,
|
659
|
+
additional_metadata={"meta_prompt": meta_metadata},
|
660
|
+
)
|
656
661
|
|
657
662
|
with reporting.display_evaluation(verbose=self.verbose) as baseline_reporter:
|
658
663
|
initial_score = self._evaluate_prompt(
|
@@ -887,6 +892,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
887
892
|
metric_name=getattr(metric, "__name__", str(metric)),
|
888
893
|
details=details,
|
889
894
|
llm_calls=self.llm_call_counter,
|
895
|
+
tool_calls=self.tool_call_counter,
|
890
896
|
dataset_id=dataset_id,
|
891
897
|
optimization_id=optimization_id,
|
892
898
|
tool_prompts=tool_prompts,
|
@@ -145,9 +145,16 @@ class LM(BaseLM):
|
|
145
145
|
):
|
146
146
|
settings.usage_tracker.add_usage(self.model, dict(results.usage))
|
147
147
|
|
148
|
-
self.
|
148
|
+
self.increment_llm_counter()
|
149
149
|
return results
|
150
150
|
|
151
|
+
def increment_llm_counter(self) -> None:
|
152
|
+
"""Increment the LLM call counter."""
|
153
|
+
self.llm_call_counter += 1
|
154
|
+
parent = getattr(self, "parent_optimizer", None)
|
155
|
+
if parent is not None and hasattr(parent, "increment_llm_counter"):
|
156
|
+
parent.increment_llm_counter()
|
157
|
+
|
151
158
|
def launch(self, launch_kwargs: dict[str, Any] | None = None):
|
152
159
|
self.provider.launch(self, launch_kwargs)
|
153
160
|
|
@@ -302,7 +309,7 @@ def request_cache(maxsize: int | None = None):
|
|
302
309
|
return decorator
|
303
310
|
|
304
311
|
|
305
|
-
@request_cache(maxsize=
|
312
|
+
@request_cache(maxsize=2000)
|
306
313
|
def cached_litellm_completion(request: dict[str, Any], num_retries: int):
|
307
314
|
return litellm_completion(
|
308
315
|
request,
|
@@ -361,7 +368,7 @@ def litellm_completion(
|
|
361
368
|
return stream_completion()
|
362
369
|
|
363
370
|
|
364
|
-
@request_cache(maxsize=
|
371
|
+
@request_cache(maxsize=2000)
|
365
372
|
def cached_litellm_text_completion(request: dict[str, Any], num_retries: int):
|
366
373
|
return litellm_text_completion(
|
367
374
|
request,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import os
|
2
2
|
import random
|
3
3
|
from datetime import datetime
|
4
|
-
from typing import Literal
|
4
|
+
from typing import Any, Literal
|
5
5
|
from collections.abc import Callable
|
6
6
|
import logging
|
7
7
|
|
@@ -15,9 +15,9 @@ from opik.integrations.dspy.callback import OpikCallback
|
|
15
15
|
from opik.opik_context import get_current_span_data
|
16
16
|
|
17
17
|
from ..optimization_result import OptimizationResult
|
18
|
-
from ..utils import optimization_context
|
19
18
|
from ..base_optimizer import BaseOptimizer
|
20
19
|
from ..optimization_config.configs import TaskConfig
|
20
|
+
from ..optimization_config import chat_prompt
|
21
21
|
from ._lm import LM
|
22
22
|
from ._mipro_optimizer_v2 import MIPROv2
|
23
23
|
from .utils import (
|
@@ -45,14 +45,26 @@ class MiproOptimizer(BaseOptimizer):
|
|
45
45
|
super().__init__(model=model, verbose=verbose, **model_kwargs)
|
46
46
|
self.tools = []
|
47
47
|
self.project_name = project_name
|
48
|
+
if "n_threads" in self.model_kwargs:
|
49
|
+
# To allow compatibility with other optimizers:
|
50
|
+
self.model_kwargs["num_threads"] = self.model_kwargs["n_threads"]
|
48
51
|
self.num_threads = self.model_kwargs.pop("num_threads", 6)
|
49
52
|
self.model_kwargs["model"] = self.model
|
50
53
|
# FIXME: add mipro_optimizer=True - It does not count the LLM calls made internally by DSPy during MiproOptimizer.optimizer.compile().
|
51
54
|
self.lm = LM(**self.model_kwargs)
|
55
|
+
setattr(self.lm, "parent_optimizer", self)
|
52
56
|
opik_callback = OpikCallback(project_name=self.project_name, log_graph=True)
|
53
57
|
dspy.configure(lm=self.lm, callbacks=[opik_callback])
|
54
58
|
logger.debug(f"Initialized MiproOptimizer with model: {model}")
|
55
59
|
|
60
|
+
def get_optimizer_metadata(self) -> dict[str, Any]:
|
61
|
+
return self._drop_none(
|
62
|
+
{
|
63
|
+
"project_name": self.project_name,
|
64
|
+
"num_threads": self.num_threads,
|
65
|
+
}
|
66
|
+
)
|
67
|
+
|
56
68
|
def evaluate_prompt(
|
57
69
|
self,
|
58
70
|
dataset: str | Dataset,
|
@@ -84,7 +96,7 @@ class MiproOptimizer(BaseOptimizer):
|
|
84
96
|
"""
|
85
97
|
# FIMXE: call super when it is ready
|
86
98
|
# FIXME: Intermediate values:
|
87
|
-
self.
|
99
|
+
self.increment_llm_counter()
|
88
100
|
input_key = task_config.input_dataset_fields[0] # FIXME: allow all inputs
|
89
101
|
output_key = task_config.output_dataset_field
|
90
102
|
|
@@ -239,23 +251,57 @@ class MiproOptimizer(BaseOptimizer):
|
|
239
251
|
|
240
252
|
def optimize_prompt(
|
241
253
|
self,
|
254
|
+
prompt: chat_prompt.ChatPrompt,
|
242
255
|
dataset: str | Dataset,
|
243
256
|
metric: Callable,
|
244
|
-
task_config: TaskConfig,
|
245
|
-
num_candidates: int = 10,
|
246
257
|
experiment_config: dict | None = None,
|
247
|
-
num_trials: int | None = 3,
|
248
258
|
n_samples: int | None = 10,
|
249
|
-
|
259
|
+
auto_continue: bool = False,
|
260
|
+
agent_class: str | None = None,
|
250
261
|
**kwargs,
|
251
262
|
) -> OptimizationResult:
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
263
|
+
"""
|
264
|
+
Optimize a prompt using MIPRO (Multi-Input Prompt Optimization).
|
265
|
+
|
266
|
+
Args:
|
267
|
+
prompt: The chat prompt to optimize
|
268
|
+
dataset: Opik dataset (or dataset name) containing evaluation data
|
269
|
+
metric: Evaluation function that takes (dataset_item, llm_output) and returns a score
|
270
|
+
experiment_config: Optional configuration for the experiment
|
271
|
+
n_samples: Number of samples to use for optimization (default: 10)
|
272
|
+
auto_continue: Whether to auto-continue optimization (default: False)
|
273
|
+
agent_class: Custom agent class to use (default: None)
|
274
|
+
**kwargs: Additional arguments including:
|
275
|
+
task_config: TaskConfig instance (required)
|
276
|
+
num_candidates: Number of candidates to generate (default: 10)
|
277
|
+
num_trials: Number of trials to run (default: 3)
|
278
|
+
auto: Optimization mode - "light", "medium", or "heavy" (default: "light")
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
OptimizationResult: The optimization result containing the optimized prompt and metrics
|
282
|
+
|
283
|
+
Raises:
|
284
|
+
ValueError: If task_config is not provided
|
285
|
+
"""
|
286
|
+
# Resolve dataset names to Dataset objects for validation compatibility
|
287
|
+
if isinstance(dataset, str):
|
288
|
+
dataset_name = dataset
|
289
|
+
client = opik.Opik(project_name=self.project_name)
|
290
|
+
dataset = client.get_dataset(dataset_name)
|
291
|
+
|
292
|
+
# Use base class validation and setup methods
|
293
|
+
self.validate_optimization_inputs(prompt, dataset, metric)
|
294
|
+
|
295
|
+
# Extract MIPRO-specific parameters from kwargs
|
296
|
+
task_config = kwargs.pop("task_config", None)
|
297
|
+
if task_config is None:
|
298
|
+
raise ValueError("task_config is required for MiproOptimizer")
|
299
|
+
|
300
|
+
num_candidates = kwargs.pop("num_candidates", 10)
|
301
|
+
num_trials = kwargs.pop("num_trials", 3)
|
302
|
+
auto = kwargs.pop("auto", "light")
|
303
|
+
|
304
|
+
with self.create_optimization_context(dataset, metric) as optimization:
|
259
305
|
result = self._optimize_prompt(
|
260
306
|
dataset=dataset,
|
261
307
|
metric=metric,
|
@@ -315,19 +361,18 @@ class MiproOptimizer(BaseOptimizer):
|
|
315
361
|
**kwargs,
|
316
362
|
) -> None:
|
317
363
|
# FIXME: Intermediate values:
|
318
|
-
self.
|
364
|
+
self.reset_counters() # Reset counters for run
|
319
365
|
prompt = task_config.instruction_prompt
|
320
366
|
input_key = task_config.input_dataset_fields[0] # FIXME: allow all
|
321
367
|
output_key = task_config.output_dataset_field
|
322
368
|
self.tools = task_config.tools
|
323
369
|
self.num_candidates = num_candidates
|
324
|
-
self.
|
370
|
+
self.auto = auto
|
325
371
|
self.input_key = input_key
|
326
372
|
self.output_key = output_key
|
327
373
|
self.prompt = prompt
|
328
374
|
self.num_trials = num_trials
|
329
375
|
self.n_samples = n_samples
|
330
|
-
self.auto = auto
|
331
376
|
|
332
377
|
# Convert to values for MIPRO:
|
333
378
|
if isinstance(dataset, str):
|
@@ -396,6 +441,19 @@ class MiproOptimizer(BaseOptimizer):
|
|
396
441
|
logger.debug(f"Using DSPy module: {type(self.module).__name__}")
|
397
442
|
logger.debug(f"Using metric function: {self.metric_function.__name__}")
|
398
443
|
|
444
|
+
def cleanup(self) -> None:
|
445
|
+
"""
|
446
|
+
Clean up MIPRO-specific resources.
|
447
|
+
"""
|
448
|
+
# Call parent cleanup
|
449
|
+
super().cleanup()
|
450
|
+
|
451
|
+
# Clear MIPRO-specific resources
|
452
|
+
self.tools = None
|
453
|
+
self.prompt = None
|
454
|
+
|
455
|
+
logger.debug("Cleaned up MIPRO-specific resources")
|
456
|
+
|
399
457
|
def load_from_checkpoint(self, filename):
|
400
458
|
"""
|
401
459
|
Load the module from a checkpoint.
|
@@ -516,7 +574,8 @@ class MiproOptimizer(BaseOptimizer):
|
|
516
574
|
),
|
517
575
|
details={"error": "No candidate programs generated by MIPRO"},
|
518
576
|
history=mipro_history_processed,
|
519
|
-
llm_calls=self.
|
577
|
+
llm_calls=self.llm_call_counter,
|
578
|
+
tool_calls=self.tool_call_counter,
|
520
579
|
)
|
521
580
|
|
522
581
|
self.module = self.get_best().details["program"]
|
@@ -548,7 +607,8 @@ class MiproOptimizer(BaseOptimizer):
|
|
548
607
|
demonstrations=best_program_details.demonstrations,
|
549
608
|
details=best_program_details.details,
|
550
609
|
history=mipro_history_processed,
|
551
|
-
llm_calls=self.
|
610
|
+
llm_calls=self.llm_call_counter,
|
611
|
+
tool_calls=self.tool_call_counter,
|
552
612
|
)
|
553
613
|
|
554
614
|
def get_best(self, position: int = 0) -> OptimizationResult:
|
@@ -556,6 +616,14 @@ class MiproOptimizer(BaseOptimizer):
|
|
556
616
|
logger.error(
|
557
617
|
"get_best() called but no best_programs found. MIPRO compile might have failed or yielded no results."
|
558
618
|
)
|
619
|
+
# Get LLM call count from the optimizer if available
|
620
|
+
dspy_llm_calls = (
|
621
|
+
getattr(self.optimizer, "total_calls", 0)
|
622
|
+
if hasattr(self, "optimizer") and self.optimizer
|
623
|
+
else 0
|
624
|
+
)
|
625
|
+
actual_llm_calls = max(self.llm_call_counter, dspy_llm_calls)
|
626
|
+
|
559
627
|
return OptimizationResult(
|
560
628
|
optimizer="MiproOptimizer",
|
561
629
|
prompt=[
|
@@ -574,7 +642,8 @@ class MiproOptimizer(BaseOptimizer):
|
|
574
642
|
),
|
575
643
|
details={"error": "No programs generated or compile failed"},
|
576
644
|
history=[],
|
577
|
-
llm_calls=
|
645
|
+
llm_calls=actual_llm_calls,
|
646
|
+
tool_calls=self.tool_call_counter,
|
578
647
|
)
|
579
648
|
|
580
649
|
score = self.best_programs[position]["score"]
|
@@ -592,6 +661,11 @@ class MiproOptimizer(BaseOptimizer):
|
|
592
661
|
best_prompt = state["signature"]["instructions"]
|
593
662
|
demos = [x.toDict() for x in state["demos"]]
|
594
663
|
|
664
|
+
# Get LLM call count from the DSPy program module
|
665
|
+
dspy_llm_calls = getattr(program_module, "total_calls", 0)
|
666
|
+
# Use the higher of our counter or DSPy's counter
|
667
|
+
actual_llm_calls = max(self.llm_call_counter, dspy_llm_calls)
|
668
|
+
|
595
669
|
print(best_prompt)
|
596
670
|
return OptimizationResult(
|
597
671
|
optimizer="MiproOptimizer",
|
@@ -601,5 +675,6 @@ class MiproOptimizer(BaseOptimizer):
|
|
601
675
|
metric_name=self.opik_metric.__name__,
|
602
676
|
demonstrations=demos,
|
603
677
|
details={"program": program_module},
|
604
|
-
llm_calls=
|
678
|
+
llm_calls=actual_llm_calls,
|
679
|
+
tool_calls=self.tool_call_counter,
|
605
680
|
)
|
@@ -147,6 +147,11 @@ class OptimizableAgent:
|
|
147
147
|
"content": str(tool_result),
|
148
148
|
}
|
149
149
|
)
|
150
|
+
# Increment tool call counter if we have access to the optimizer
|
151
|
+
if hasattr(self, "optimizer") and hasattr(
|
152
|
+
self.optimizer, "increment_tool_counter"
|
153
|
+
):
|
154
|
+
self.optimizer.increment_tool_counter()
|
150
155
|
else:
|
151
156
|
final_response = msg["content"]
|
152
157
|
break
|
@@ -27,6 +27,7 @@ class OptimizationResult(pydantic.BaseModel):
|
|
27
27
|
details: dict[str, Any] = pydantic.Field(default_factory=dict)
|
28
28
|
history: list[dict[str, Any]] = []
|
29
29
|
llm_calls: int | None = None
|
30
|
+
tool_calls: int | None = None
|
30
31
|
|
31
32
|
# MIPRO specific
|
32
33
|
demonstrations: list[dict[str, Any]] | None = None
|