opik-optimizer 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. opik_optimizer/__init__.py +2 -0
  2. opik_optimizer/base_optimizer.py +376 -19
  3. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +80 -17
  4. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +179 -39
  5. opik_optimizer/evolutionary_optimizer/llm_support.py +3 -1
  6. opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
  7. opik_optimizer/evolutionary_optimizer/mutation_ops.py +17 -3
  8. opik_optimizer/evolutionary_optimizer/population_ops.py +5 -0
  9. opik_optimizer/evolutionary_optimizer/prompts.py +47 -0
  10. opik_optimizer/evolutionary_optimizer/reporting.py +12 -0
  11. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +65 -59
  12. opik_optimizer/gepa_optimizer/adapter.py +5 -3
  13. opik_optimizer/gepa_optimizer/gepa_optimizer.py +163 -66
  14. opik_optimizer/mcp_utils/mcp_workflow.py +57 -3
  15. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +75 -69
  16. opik_optimizer/mipro_optimizer/_lm.py +10 -3
  17. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +1 -1
  18. opik_optimizer/mipro_optimizer/mipro_optimizer.py +96 -21
  19. opik_optimizer/optimizable_agent.py +5 -0
  20. opik_optimizer/optimization_result.py +1 -0
  21. opik_optimizer/utils/core.py +56 -14
  22. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/METADATA +96 -9
  23. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/RECORD +27 -26
  24. /opik_optimizer/{colbert.py → utils/colbert.py} +0 -0
  25. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
  26. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/licenses/LICENSE +0 -0
  27. {opik_optimizer-1.1.0.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import json
3
3
  import logging
4
4
  import os
5
5
  import textwrap
6
+ import warnings
6
7
  from typing import Any, cast
7
8
  from collections.abc import Callable
8
9
 
@@ -11,12 +12,10 @@ import opik
11
12
  from litellm.caching import Cache
12
13
  from litellm.types.caching import LiteLLMCacheType
13
14
  from opik import Dataset
14
- from opik.api_objects import opik_client
15
15
  from opik.environment import get_tqdm_for_current_environment
16
16
  from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
17
17
 
18
18
  from opik_optimizer import task_evaluator
19
- from ..utils.core import create_litellm_agent_class
20
19
 
21
20
  from .. import _throttle
22
21
  from ..base_optimizer import BaseOptimizer, OptimizationRound
@@ -143,6 +142,7 @@ class MetaPromptOptimizer(BaseOptimizer):
143
142
  verbose: int = 1,
144
143
  enable_context: bool = True,
145
144
  n_threads: int = 12,
145
+ seed: int = 42,
146
146
  **model_kwargs: Any,
147
147
  ) -> None:
148
148
  """
@@ -157,22 +157,28 @@ class MetaPromptOptimizer(BaseOptimizer):
157
157
  **model_kwargs: Additional model parameters
158
158
  """
159
159
  if "project_name" in model_kwargs:
160
- print(
161
- "Removing `project_name` from constructor; it now belongs in the ChatPrompt()"
160
+ warnings.warn(
161
+ "The 'project_name' parameter in optimizer constructor is deprecated. "
162
+ "Set project_name in the ChatPrompt instead.",
163
+ DeprecationWarning,
164
+ stacklevel=2,
162
165
  )
163
166
  del model_kwargs["project_name"]
164
167
 
165
- super().__init__(model=model, verbose=verbose, **model_kwargs)
168
+ super().__init__(model=model, verbose=verbose, seed=seed, **model_kwargs)
166
169
  self.reasoning_model = reasoning_model if reasoning_model is not None else model
167
170
  self.rounds = rounds
168
171
  self.num_prompts_per_round = num_prompts_per_round
169
172
  if num_threads is not None:
170
- print("num_threads is deprecated; use n_threads instead")
173
+ warnings.warn(
174
+ "The 'num_threads' parameter is deprecated and will be removed in a future version. "
175
+ "Use 'n_threads' instead.",
176
+ DeprecationWarning,
177
+ stacklevel=2,
178
+ )
171
179
  n_threads = num_threads
172
180
  self.num_threads = n_threads
173
181
  self.dataset: Dataset | None = None
174
- self._opik_client = opik_client.get_client_cached()
175
- self.llm_call_counter = 0
176
182
  self.enable_context = enable_context
177
183
  logger.debug(
178
184
  f"Initialized MetaPromptOptimizer with model={model}, reasoning_model={self.reasoning_model}"
@@ -181,6 +187,14 @@ class MetaPromptOptimizer(BaseOptimizer):
181
187
  f"Optimization rounds: {rounds}, Prompts/round: {num_prompts_per_round}"
182
188
  )
183
189
 
190
+ def get_optimizer_metadata(self) -> dict[str, Any]:
191
+ return {
192
+ "rounds": self.rounds,
193
+ "num_prompts_per_round": self.num_prompts_per_round,
194
+ "reasoning_model": self.reasoning_model,
195
+ "enable_context": self.enable_context,
196
+ }
197
+
184
198
  @_throttle.rate_limited(_rate_limiter)
185
199
  def _call_model(
186
200
  self,
@@ -190,7 +204,7 @@ class MetaPromptOptimizer(BaseOptimizer):
190
204
  optimization_id: str | None = None,
191
205
  ) -> str:
192
206
  """Call the model with the given prompt and return the response."""
193
- self.llm_call_counter += 1
207
+ self.increment_llm_counter()
194
208
  # Note: Basic retry logic could be added here using tenacity
195
209
  try:
196
210
  # Basic LLM parameters (e.g., temperature, max_tokens)
@@ -321,25 +335,28 @@ class MetaPromptOptimizer(BaseOptimizer):
321
335
  subset_size = None # Use all items for final checks
322
336
  logger.debug("Using full dataset for evaluation")
323
337
 
324
- experiment_config = experiment_config or {}
325
- experiment_config = {
326
- **experiment_config,
327
- **{
328
- "optimizer": self.__class__.__name__,
329
- "agent_class": self.agent_class.__name__,
330
- "agent_config": prompt.to_dict(),
331
- "metric": getattr(metric, "__name__", str(metric)),
332
- "dataset": dataset.name,
333
- "configuration": {
334
- "prompt": prompt.get_messages(),
335
- "tools": getattr(prompt, "tools", None),
336
- "n_samples": subset_size,
337
- "use_full_dataset": use_full_dataset,
338
- },
339
- },
340
- }
341
- if optimization_id:
342
- experiment_config["optimization_id"] = optimization_id
338
+ configuration_updates = self._drop_none(
339
+ {
340
+ "n_samples": subset_size,
341
+ "use_full_dataset": use_full_dataset,
342
+ }
343
+ )
344
+ meta_metadata = self._drop_none(
345
+ {
346
+ "optimization_id": optimization_id,
347
+ "stage": "trial_evaluation" if not use_full_dataset else "final_eval",
348
+ }
349
+ )
350
+ experiment_config = self._prepare_experiment_config(
351
+ prompt=prompt,
352
+ dataset=dataset,
353
+ metric=metric,
354
+ experiment_config=experiment_config,
355
+ configuration_updates=configuration_updates,
356
+ additional_metadata={"meta_prompt": meta_metadata}
357
+ if meta_metadata
358
+ else None,
359
+ )
343
360
 
344
361
  def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
345
362
  new_prompt = prompt.copy()
@@ -357,7 +374,7 @@ class MetaPromptOptimizer(BaseOptimizer):
357
374
  )
358
375
  raw_model_output = agent.llm_invoke(
359
376
  messages=messages,
360
- seed=None,
377
+ seed=self.seed,
361
378
  allow_tool_use=True,
362
379
  )
363
380
  except Exception as exc:
@@ -391,7 +408,7 @@ class MetaPromptOptimizer(BaseOptimizer):
391
408
  )
392
409
  final_response = agent.llm_invoke(
393
410
  messages=second_pass_messages,
394
- seed=None,
411
+ seed=self.seed,
395
412
  allow_tool_use=mcp_config.allow_tool_use_on_second_pass,
396
413
  )
397
414
  else:
@@ -459,36 +476,25 @@ class MetaPromptOptimizer(BaseOptimizer):
459
476
  Optimize a prompt using meta-reasoning.
460
477
 
461
478
  Args:
479
+ prompt: The prompt to optimize
462
480
  dataset: The dataset to evaluate against
463
481
  metric: The metric to use for evaluation
464
482
  experiment_config: A dictionary to log with the experiments
465
483
  n_samples: The number of dataset items to use for evaluation
466
484
  auto_continue: If True, the algorithm may continue if goal not met
467
- **kwargs: Additional arguments for evaluation
485
+ agent_class: Optional agent class to use
486
+ **kwargs: Additional arguments for evaluation, including:
487
+ mcp_config (MCPExecutionConfig | None): MCP tool calling configuration (default: None)
488
+ candidate_generator: Optional candidate generator
489
+ candidate_generator_kwargs: Optional kwargs for candidate generator
468
490
 
469
491
  Returns:
470
492
  OptimizationResult: Structured result containing optimization details
471
493
  """
472
- if not isinstance(prompt, chat_prompt.ChatPrompt):
473
- raise ValueError("Prompt must be a ChatPrompt object")
474
-
475
- if not isinstance(dataset, Dataset):
476
- raise ValueError("Dataset must be a Dataset object")
477
-
478
- if not callable(metric):
479
- raise ValueError(
480
- "Metric must be a function that takes `dataset_item` and `llm_output` as arguments."
481
- )
482
-
483
- if prompt.model is None:
484
- prompt.model = self.model
485
- if prompt.model_kwargs is None:
486
- prompt.model_kwargs = self.model_kwargs
487
-
488
- if agent_class is None:
489
- self.agent_class = create_litellm_agent_class(prompt)
490
- else:
491
- self.agent_class = agent_class
494
+ # Use base class validation and setup methods
495
+ self.validate_optimization_inputs(prompt, dataset, metric)
496
+ self.configure_prompt_model(prompt)
497
+ self.agent_class = self.setup_agent_class(prompt, agent_class)
492
498
 
493
499
  total_items = len(dataset.get_items())
494
500
  if n_samples is not None and n_samples > total_items:
@@ -499,7 +505,7 @@ class MetaPromptOptimizer(BaseOptimizer):
499
505
 
500
506
  optimization = None
501
507
  try:
502
- optimization = self._opik_client.create_optimization(
508
+ optimization = self.opik_client.create_optimization(
503
509
  dataset_name=dataset.name,
504
510
  objective_name=getattr(metric, "__name__", str(metric)),
505
511
  metadata={"optimizer": self.__class__.__name__},
@@ -633,26 +639,25 @@ class MetaPromptOptimizer(BaseOptimizer):
633
639
  self.auto_continue = auto_continue
634
640
  self.dataset = dataset
635
641
  self.prompt = prompt
636
- self.llm_call_counter = 0 # Reset counter for run
642
+ self.reset_counters() # Reset counters for run
637
643
  initial_prompt = prompt
638
644
 
639
645
  current_prompt = prompt
640
- experiment_config = experiment_config or {}
641
- experiment_config = {
642
- **experiment_config,
643
- **{
644
- "optimizer": self.__class__.__name__,
645
- "agent_class": self.agent_class.__name__,
646
- "agent_config": prompt.to_dict(),
647
- "metric": getattr(metric, "__name__", str(metric)),
648
- "dataset": dataset.name,
649
- "configuration": {
650
- "prompt": prompt.get_messages(),
651
- "rounds": self.rounds,
652
- "num_prompts_per_round": self.num_prompts_per_round,
653
- },
654
- },
655
- }
646
+ configuration_updates = self._drop_none(
647
+ {
648
+ "rounds": self.rounds,
649
+ "num_prompts_per_round": self.num_prompts_per_round,
650
+ }
651
+ )
652
+ meta_metadata = {"stage": "initial"}
653
+ experiment_config = self._prepare_experiment_config(
654
+ prompt=prompt,
655
+ dataset=dataset,
656
+ metric=metric,
657
+ experiment_config=experiment_config,
658
+ configuration_updates=configuration_updates,
659
+ additional_metadata={"meta_prompt": meta_metadata},
660
+ )
656
661
 
657
662
  with reporting.display_evaluation(verbose=self.verbose) as baseline_reporter:
658
663
  initial_score = self._evaluate_prompt(
@@ -887,6 +892,7 @@ class MetaPromptOptimizer(BaseOptimizer):
887
892
  metric_name=getattr(metric, "__name__", str(metric)),
888
893
  details=details,
889
894
  llm_calls=self.llm_call_counter,
895
+ tool_calls=self.tool_call_counter,
890
896
  dataset_id=dataset_id,
891
897
  optimization_id=optimization_id,
892
898
  tool_prompts=tool_prompts,
@@ -145,9 +145,16 @@ class LM(BaseLM):
145
145
  ):
146
146
  settings.usage_tracker.add_usage(self.model, dict(results.usage))
147
147
 
148
- self.llm_call_counter += 1
148
+ self.increment_llm_counter()
149
149
  return results
150
150
 
151
+ def increment_llm_counter(self) -> None:
152
+ """Increment the LLM call counter."""
153
+ self.llm_call_counter += 1
154
+ parent = getattr(self, "parent_optimizer", None)
155
+ if parent is not None and hasattr(parent, "increment_llm_counter"):
156
+ parent.increment_llm_counter()
157
+
151
158
  def launch(self, launch_kwargs: dict[str, Any] | None = None):
152
159
  self.provider.launch(self, launch_kwargs)
153
160
 
@@ -302,7 +309,7 @@ def request_cache(maxsize: int | None = None):
302
309
  return decorator
303
310
 
304
311
 
305
- @request_cache(maxsize=None)
312
+ @request_cache(maxsize=2000)
306
313
  def cached_litellm_completion(request: dict[str, Any], num_retries: int):
307
314
  return litellm_completion(
308
315
  request,
@@ -361,7 +368,7 @@ def litellm_completion(
361
368
  return stream_completion()
362
369
 
363
370
 
364
- @request_cache(maxsize=None)
371
+ @request_cache(maxsize=2000)
365
372
  def cached_litellm_text_completion(request: dict[str, Any], num_retries: int):
366
373
  return litellm_text_completion(
367
374
  request,
@@ -79,7 +79,7 @@ class MIPROv2(Teleprompter):
79
79
  num_candidates: int = 10,
80
80
  num_threads: int | None = None,
81
81
  max_errors: int = 10,
82
- seed: int = 9,
82
+ seed: int = 42,
83
83
  init_temperature: float = 0.5,
84
84
  verbose: bool = False,
85
85
  track_stats: bool = True,
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import random
3
3
  from datetime import datetime
4
- from typing import Literal
4
+ from typing import Any, Literal
5
5
  from collections.abc import Callable
6
6
  import logging
7
7
 
@@ -15,9 +15,9 @@ from opik.integrations.dspy.callback import OpikCallback
15
15
  from opik.opik_context import get_current_span_data
16
16
 
17
17
  from ..optimization_result import OptimizationResult
18
- from ..utils import optimization_context
19
18
  from ..base_optimizer import BaseOptimizer
20
19
  from ..optimization_config.configs import TaskConfig
20
+ from ..optimization_config import chat_prompt
21
21
  from ._lm import LM
22
22
  from ._mipro_optimizer_v2 import MIPROv2
23
23
  from .utils import (
@@ -45,14 +45,26 @@ class MiproOptimizer(BaseOptimizer):
45
45
  super().__init__(model=model, verbose=verbose, **model_kwargs)
46
46
  self.tools = []
47
47
  self.project_name = project_name
48
+ if "n_threads" in self.model_kwargs:
49
+ # To allow compatibility with other optimizers:
50
+ self.model_kwargs["num_threads"] = self.model_kwargs["n_threads"]
48
51
  self.num_threads = self.model_kwargs.pop("num_threads", 6)
49
52
  self.model_kwargs["model"] = self.model
50
53
  # FIXME: add mipro_optimizer=True - It does not count the LLM calls made internally by DSPy during MiproOptimizer.optimizer.compile().
51
54
  self.lm = LM(**self.model_kwargs)
55
+ setattr(self.lm, "parent_optimizer", self)
52
56
  opik_callback = OpikCallback(project_name=self.project_name, log_graph=True)
53
57
  dspy.configure(lm=self.lm, callbacks=[opik_callback])
54
58
  logger.debug(f"Initialized MiproOptimizer with model: {model}")
55
59
 
60
+ def get_optimizer_metadata(self) -> dict[str, Any]:
61
+ return self._drop_none(
62
+ {
63
+ "project_name": self.project_name,
64
+ "num_threads": self.num_threads,
65
+ }
66
+ )
67
+
56
68
  def evaluate_prompt(
57
69
  self,
58
70
  dataset: str | Dataset,
@@ -84,7 +96,7 @@ class MiproOptimizer(BaseOptimizer):
84
96
  """
85
97
  # FIMXE: call super when it is ready
86
98
  # FIXME: Intermediate values:
87
- self.llm_call_counter += 1
99
+ self.increment_llm_counter()
88
100
  input_key = task_config.input_dataset_fields[0] # FIXME: allow all inputs
89
101
  output_key = task_config.output_dataset_field
90
102
 
@@ -239,23 +251,57 @@ class MiproOptimizer(BaseOptimizer):
239
251
 
240
252
  def optimize_prompt(
241
253
  self,
254
+ prompt: chat_prompt.ChatPrompt,
242
255
  dataset: str | Dataset,
243
256
  metric: Callable,
244
- task_config: TaskConfig,
245
- num_candidates: int = 10,
246
257
  experiment_config: dict | None = None,
247
- num_trials: int | None = 3,
248
258
  n_samples: int | None = 10,
249
- auto: Literal["light", "medium", "heavy"] | None = "light",
259
+ auto_continue: bool = False,
260
+ agent_class: str | None = None,
250
261
  **kwargs,
251
262
  ) -> OptimizationResult:
252
- self._opik_client = opik.Opik()
253
- with optimization_context(
254
- client=self._opik_client,
255
- dataset_name=dataset.name,
256
- objective_name=metric.__name__,
257
- metadata={"optimizer": self.__class__.__name__},
258
- ) as optimization:
263
+ """
264
+ Optimize a prompt using MIPRO (Multi-Input Prompt Optimization).
265
+
266
+ Args:
267
+ prompt: The chat prompt to optimize
268
+ dataset: Opik dataset (or dataset name) containing evaluation data
269
+ metric: Evaluation function that takes (dataset_item, llm_output) and returns a score
270
+ experiment_config: Optional configuration for the experiment
271
+ n_samples: Number of samples to use for optimization (default: 10)
272
+ auto_continue: Whether to auto-continue optimization (default: False)
273
+ agent_class: Custom agent class to use (default: None)
274
+ **kwargs: Additional arguments including:
275
+ task_config: TaskConfig instance (required)
276
+ num_candidates: Number of candidates to generate (default: 10)
277
+ num_trials: Number of trials to run (default: 3)
278
+ auto: Optimization mode - "light", "medium", or "heavy" (default: "light")
279
+
280
+ Returns:
281
+ OptimizationResult: The optimization result containing the optimized prompt and metrics
282
+
283
+ Raises:
284
+ ValueError: If task_config is not provided
285
+ """
286
+ # Resolve dataset names to Dataset objects for validation compatibility
287
+ if isinstance(dataset, str):
288
+ dataset_name = dataset
289
+ client = opik.Opik(project_name=self.project_name)
290
+ dataset = client.get_dataset(dataset_name)
291
+
292
+ # Use base class validation and setup methods
293
+ self.validate_optimization_inputs(prompt, dataset, metric)
294
+
295
+ # Extract MIPRO-specific parameters from kwargs
296
+ task_config = kwargs.pop("task_config", None)
297
+ if task_config is None:
298
+ raise ValueError("task_config is required for MiproOptimizer")
299
+
300
+ num_candidates = kwargs.pop("num_candidates", 10)
301
+ num_trials = kwargs.pop("num_trials", 3)
302
+ auto = kwargs.pop("auto", "light")
303
+
304
+ with self.create_optimization_context(dataset, metric) as optimization:
259
305
  result = self._optimize_prompt(
260
306
  dataset=dataset,
261
307
  metric=metric,
@@ -315,19 +361,18 @@ class MiproOptimizer(BaseOptimizer):
315
361
  **kwargs,
316
362
  ) -> None:
317
363
  # FIXME: Intermediate values:
318
- self.llm_call_counter = 0
364
+ self.reset_counters() # Reset counters for run
319
365
  prompt = task_config.instruction_prompt
320
366
  input_key = task_config.input_dataset_fields[0] # FIXME: allow all
321
367
  output_key = task_config.output_dataset_field
322
368
  self.tools = task_config.tools
323
369
  self.num_candidates = num_candidates
324
- self.seed = 42
370
+ self.auto = auto
325
371
  self.input_key = input_key
326
372
  self.output_key = output_key
327
373
  self.prompt = prompt
328
374
  self.num_trials = num_trials
329
375
  self.n_samples = n_samples
330
- self.auto = auto
331
376
 
332
377
  # Convert to values for MIPRO:
333
378
  if isinstance(dataset, str):
@@ -396,6 +441,19 @@ class MiproOptimizer(BaseOptimizer):
396
441
  logger.debug(f"Using DSPy module: {type(self.module).__name__}")
397
442
  logger.debug(f"Using metric function: {self.metric_function.__name__}")
398
443
 
444
+ def cleanup(self) -> None:
445
+ """
446
+ Clean up MIPRO-specific resources.
447
+ """
448
+ # Call parent cleanup
449
+ super().cleanup()
450
+
451
+ # Clear MIPRO-specific resources
452
+ self.tools = None
453
+ self.prompt = None
454
+
455
+ logger.debug("Cleaned up MIPRO-specific resources")
456
+
399
457
  def load_from_checkpoint(self, filename):
400
458
  """
401
459
  Load the module from a checkpoint.
@@ -516,7 +574,8 @@ class MiproOptimizer(BaseOptimizer):
516
574
  ),
517
575
  details={"error": "No candidate programs generated by MIPRO"},
518
576
  history=mipro_history_processed,
519
- llm_calls=self.lm.llm_call_counter,
577
+ llm_calls=self.llm_call_counter,
578
+ tool_calls=self.tool_call_counter,
520
579
  )
521
580
 
522
581
  self.module = self.get_best().details["program"]
@@ -548,7 +607,8 @@ class MiproOptimizer(BaseOptimizer):
548
607
  demonstrations=best_program_details.demonstrations,
549
608
  details=best_program_details.details,
550
609
  history=mipro_history_processed,
551
- llm_calls=self.lm.llm_call_counter,
610
+ llm_calls=self.llm_call_counter,
611
+ tool_calls=self.tool_call_counter,
552
612
  )
553
613
 
554
614
  def get_best(self, position: int = 0) -> OptimizationResult:
@@ -556,6 +616,14 @@ class MiproOptimizer(BaseOptimizer):
556
616
  logger.error(
557
617
  "get_best() called but no best_programs found. MIPRO compile might have failed or yielded no results."
558
618
  )
619
+ # Get LLM call count from the optimizer if available
620
+ dspy_llm_calls = (
621
+ getattr(self.optimizer, "total_calls", 0)
622
+ if hasattr(self, "optimizer") and self.optimizer
623
+ else 0
624
+ )
625
+ actual_llm_calls = max(self.llm_call_counter, dspy_llm_calls)
626
+
559
627
  return OptimizationResult(
560
628
  optimizer="MiproOptimizer",
561
629
  prompt=[
@@ -574,7 +642,8 @@ class MiproOptimizer(BaseOptimizer):
574
642
  ),
575
643
  details={"error": "No programs generated or compile failed"},
576
644
  history=[],
577
- llm_calls=self.lm.llm_call_counter,
645
+ llm_calls=actual_llm_calls,
646
+ tool_calls=self.tool_call_counter,
578
647
  )
579
648
 
580
649
  score = self.best_programs[position]["score"]
@@ -592,6 +661,11 @@ class MiproOptimizer(BaseOptimizer):
592
661
  best_prompt = state["signature"]["instructions"]
593
662
  demos = [x.toDict() for x in state["demos"]]
594
663
 
664
+ # Get LLM call count from the DSPy program module
665
+ dspy_llm_calls = getattr(program_module, "total_calls", 0)
666
+ # Use the higher of our counter or DSPy's counter
667
+ actual_llm_calls = max(self.llm_call_counter, dspy_llm_calls)
668
+
595
669
  print(best_prompt)
596
670
  return OptimizationResult(
597
671
  optimizer="MiproOptimizer",
@@ -601,5 +675,6 @@ class MiproOptimizer(BaseOptimizer):
601
675
  metric_name=self.opik_metric.__name__,
602
676
  demonstrations=demos,
603
677
  details={"program": program_module},
604
- llm_calls=self.lm.llm_call_counter,
678
+ llm_calls=actual_llm_calls,
679
+ tool_calls=self.tool_call_counter,
605
680
  )
@@ -147,6 +147,11 @@ class OptimizableAgent:
147
147
  "content": str(tool_result),
148
148
  }
149
149
  )
150
+ # Increment tool call counter if we have access to the optimizer
151
+ if hasattr(self, "optimizer") and hasattr(
152
+ self.optimizer, "increment_tool_counter"
153
+ ):
154
+ self.optimizer.increment_tool_counter()
150
155
  else:
151
156
  final_response = msg["content"]
152
157
  break
@@ -27,6 +27,7 @@ class OptimizationResult(pydantic.BaseModel):
27
27
  details: dict[str, Any] = pydantic.Field(default_factory=dict)
28
28
  history: list[dict[str, Any]] = []
29
29
  llm_calls: int | None = None
30
+ tool_calls: int | None = None
30
31
 
31
32
  # MIPRO specific
32
33
  demonstrations: list[dict[str, Any]] | None = None