opik-optimizer 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -828,7 +828,35 @@ Return only the new prompt list object.
828
828
  auto_continue: Whether to automatically continue optimization
829
829
  **kwargs: Additional keyword arguments
830
830
  """
831
- reporting.display_header(self.__class__.__name__, verbose=self.verbose)
831
+ if not isinstance(prompt, chat_prompt.ChatPrompt):
832
+ raise ValueError("Prompt must be a ChatPrompt object")
833
+
834
+ if not isinstance(dataset, opik.Dataset):
835
+ raise ValueError("Dataset must be a Dataset object")
836
+
837
+ if not isinstance(metric, Callable):
838
+ raise ValueError("Metric must be a function that takes `dataset_item` and `llm_output` as arguments.")
839
+
840
+ # Step 0. Start Opik optimization run
841
+ opik_optimization_run: Optional[optimization.Optimization] = None
842
+ try:
843
+ opik_optimization_run: optimization.Optimization = self._opik_client.create_optimization(
844
+ dataset_name=dataset.name,
845
+ objective_name=metric.__name__,
846
+ metadata={"optimizer": self.__class__.__name__},
847
+ )
848
+ self._current_optimization_id = opik_optimization_run.id
849
+ except Exception as e:
850
+ logger.warning(f"Opik server error: {e}. Continuing without Opik tracking.")
851
+ self._current_optimization_id = None
852
+
853
+ reporting.display_header(
854
+ algorithm=self.__class__.__name__,
855
+ optimization_id=self._current_optimization_id,
856
+ dataset_id=dataset.id,
857
+ verbose=self.verbose
858
+ )
859
+
832
860
  reporting.display_configuration(
833
861
  prompt.formatted_messages,
834
862
  {
@@ -841,9 +869,9 @@ Return only the new prompt list object.
841
869
  verbose=self.verbose
842
870
  )
843
871
 
872
+ # Step 1. Step variables and define fitness function
844
873
  self.llm_call_counter = 0
845
874
  self._history = []
846
- self._current_optimization_id = None
847
875
  self._current_generation = 0
848
876
  self._best_fitness_history = []
849
877
  self._generations_without_improvement = 0
@@ -851,7 +879,6 @@ Return only the new prompt list object.
851
879
  self._current_population = []
852
880
  self._generations_without_overall_improvement = 0
853
881
 
854
- # Step 0. Define fitness function
855
882
  if self.enable_moo:
856
883
  def _deap_evaluate_individual_fitness(
857
884
  messages: List[Dict[str, str]]
@@ -884,19 +911,6 @@ Return only the new prompt list object.
884
911
  return (fitness_score,)
885
912
  self.toolbox.register("evaluate", _deap_evaluate_individual_fitness)
886
913
 
887
- # Step 1. Start Opik optimization run
888
- opik_optimization_run: Optional[optimization.Optimization] = None
889
- try:
890
- opik_optimization_run: optimization.Optimization = self._opik_client.create_optimization(
891
- dataset_name=dataset.name,
892
- objective_name=metric.__name__,
893
- metadata={"optimizer": self.__class__.__name__},
894
- )
895
- self._current_optimization_id = opik_optimization_run.id
896
- logger.info(f"Created Opik Optimization run with ID: {self._current_optimization_id}")
897
- except Exception as e:
898
- logger.warning(f"Opik server error: {e}. Continuing without Opik tracking.")
899
-
900
914
  # Step 2. Compute the initial performance of the prompt
901
915
  with reporting.baseline_performance(verbose=self.verbose) as report_baseline_performance:
902
916
  initial_eval_result: Tuple[float, float] | Tuple[float, ] = _deap_evaluate_individual_fitness(prompt.formatted_messages)
@@ -976,7 +990,7 @@ Return only the new prompt list object.
976
990
  best_prompt=best_prompt_overall,
977
991
  best_score=best_primary_score_overall,
978
992
  improvement=0.0
979
- ).dict()
993
+ ).model_dump()
980
994
  self._add_to_history(initial_round_data)
981
995
 
982
996
  with reporting.start_evolutionary_algo(verbose=self.verbose) as report_evolutionary_algo:
@@ -1035,7 +1049,7 @@ Return only the new prompt list object.
1035
1049
  best_prompt=best_prompt_overall,
1036
1050
  best_score=best_primary_score_overall,
1037
1051
  improvement=(best_primary_score_overall - initial_primary_score) / abs(initial_primary_score) if initial_primary_score and initial_primary_score != 0 else (1.0 if best_primary_score_overall > 0 else 0.0)
1038
- ).dict()
1052
+ ).model_dump()
1039
1053
  self._add_to_history(gen_round_data)
1040
1054
 
1041
1055
  stopped_early_flag = self._generations_without_overall_improvement >= self.DEFAULT_EARLY_STOPPING_GENERATIONS
@@ -1101,6 +1115,7 @@ Return only the new prompt list object.
1101
1115
  # Add final details
1102
1116
  final_details.update({
1103
1117
  "total_generations_run": generation_idx + 1,
1118
+ "num_generations": self.num_generations,
1104
1119
  "population_size": self.population_size,
1105
1120
  "mutation_probability": self.mutation_rate,
1106
1121
  "crossover_probability": self.crossover_rate,
@@ -1132,7 +1147,9 @@ Return only the new prompt list object.
1132
1147
  return OptimizationResult(
1133
1148
  optimizer=self.__class__.__name__,
1134
1149
  prompt=final_best_prompt.formatted_messages,
1135
- score=final_primary_score,
1150
+ score=final_primary_score,
1151
+ initial_prompt=prompt.formatted_messages,
1152
+ initial_score=initial_primary_score,
1136
1153
  metric_name=metric.__name__,
1137
1154
  details=final_details,
1138
1155
  history=self.get_history(),
@@ -1186,6 +1203,7 @@ Return only the new prompt list object.
1186
1203
  response = litellm.completion(
1187
1204
  model=self.model, messages=messages, **final_call_params
1188
1205
  )
1206
+ self.llm_call_counter += 1
1189
1207
 
1190
1208
  logger.debug(f"Response: {response}")
1191
1209
  return response.choices[0].message.content
@@ -2,8 +2,6 @@ from contextlib import contextmanager
2
2
  from io import StringIO
3
3
  from typing import List
4
4
 
5
- import rich
6
- from rich.console import Console
7
5
  from rich.panel import Panel
8
6
  from rich.text import Text
9
7
 
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import json
2
3
  import logging
3
4
  import random
@@ -194,6 +195,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
194
195
 
195
196
  def _run_optimization(
196
197
  self,
198
+ initial_prompt: chat_prompt.ChatPrompt,
197
199
  fewshot_prompt_template: FewShotPromptTemplate,
198
200
  dataset: Dataset,
199
201
  metric: Callable,
@@ -249,13 +251,14 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
249
251
  for key, value in example.items():
250
252
  processed_example[key] = str(value)
251
253
 
252
- try:
253
- processed_demo_examples.append(
254
- fewshot_prompt_template.example_template.format(**processed_example)
255
- )
256
- except Exception:
257
- logger.error(f"Failed to format fewshot prompt template {fewshot_prompt_template} with example: {processed_example} ")
258
- raise
254
+ processed_demo_example=fewshot_prompt_template.example_template
255
+ for key, value in processed_example.items():
256
+ try:
257
+ processed_demo_example=processed_demo_example.replace(f"{{{key}}}", str(value))
258
+ except Exception:
259
+ logger.error(f"Failed to format fewshot prompt template {fewshot_prompt_template} with example: {processed_example} ")
260
+ raise
261
+ processed_demo_examples.append(processed_demo_example)
259
262
  few_shot_examples = "\n\n".join(processed_demo_examples)
260
263
 
261
264
  llm_task = self._build_task_from_messages(
@@ -364,6 +367,12 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
364
367
  best_score = best_trial.value
365
368
  best_example_indices = best_trial.user_attrs.get("example_indices", [])
366
369
 
370
+ if best_score <= baseline_score:
371
+ best_score = baseline_score
372
+ best_prompt = initial_prompt.formatted_messages
373
+ else:
374
+ best_prompt = best_trial.user_attrs["config"]["message_list"]
375
+
367
376
  reporting.display_result(
368
377
  initial_score=baseline_score,
369
378
  best_score=best_score,
@@ -374,9 +383,12 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
374
383
  return optimization_result.OptimizationResult(
375
384
  optimizer=self.__class__.__name__,
376
385
  prompt=best_trial.user_attrs["config"]["message_list"],
386
+ initial_prompt=initial_prompt.formatted_messages,
387
+ initial_score=baseline_score,
377
388
  score=best_score,
378
389
  metric_name=metric.__name__,
379
390
  details={
391
+ "initial_score": baseline_score,
380
392
  "chat_messages": best_trial.user_attrs["config"]["message_list"],
381
393
  "prompt_parameter": best_trial.user_attrs["config"],
382
394
  #"n_examples": best_n_examples,
@@ -414,6 +426,16 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
414
426
  Returns:
415
427
  OptimizationResult: Result of the optimization
416
428
  """
429
+ if not isinstance(prompt, chat_prompt.ChatPrompt):
430
+ raise ValueError("Prompt must be a ChatPrompt object")
431
+
432
+ if not isinstance(dataset, Dataset):
433
+ raise ValueError("Dataset must be a Dataset object")
434
+
435
+ if not isinstance(metric, Callable):
436
+ raise ValueError("Metric must be a function that takes `dataset_item` and `llm_output` as arguments.")
437
+
438
+
417
439
  optimization = None
418
440
  try:
419
441
  optimization = self._opik_client.create_optimization(
@@ -421,15 +443,22 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
421
443
  objective_name=metric.__name__,
422
444
  metadata={"optimizer": self.__class__.__name__},
423
445
  )
446
+ optimization_run_id = optimization.id
424
447
  except Exception:
425
448
  logger.warning(
426
449
  "Opik server does not support optimizations. Please upgrade opik."
427
450
  )
428
451
  optimization = None
452
+ optimization_run_id = None
429
453
 
430
454
  try:
431
455
  # Start experiment reporting
432
- reporting.display_header("Few-Shot Bayesian Optimizer", verbose=self.verbose)
456
+ reporting.display_header(
457
+ algorithm=self.__class__.__name__,
458
+ optimization_id=optimization_run_id,
459
+ dataset_id=dataset.id,
460
+ verbose=self.verbose
461
+ )
433
462
  reporting.display_configuration(
434
463
  prompt.formatted_messages,
435
464
  optimizer_config={
@@ -468,6 +497,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
468
497
 
469
498
  # Step 3. Start the optimization process
470
499
  result = self._run_optimization(
500
+ initial_prompt=prompt,
471
501
  fewshot_prompt_template=fewshot_template,
472
502
  dataset=dataset,
473
503
  metric=metric,
@@ -563,18 +593,15 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
563
593
  self, messages: List[Dict[str, str]], few_shot_examples: Optional[str] = None
564
594
  ):
565
595
  def llm_task(dataset_item: Dict[str, Any]) -> Dict[str, Any]:
596
+ prompt_ = copy.deepcopy(messages)
566
597
  for key, value in dataset_item.items():
567
- prompt_ = [{
568
- "role": item["role"],
569
- "content": item["content"].replace("{" + key + "}", str(value))
570
- } for item in messages]
598
+ for item in prompt_:
599
+ item["content"] = item["content"].replace("{" + key + "}", str(value))
571
600
 
572
601
  if few_shot_examples:
573
- prompt_ = [{
574
- "role": item["role"],
575
- "content": item["content"].replace(FEW_SHOT_EXAMPLE_PLACEHOLDER, few_shot_examples)
576
- } for item in prompt_]
577
-
602
+ for item in prompt_:
603
+ item["content"] = item["content"].replace(FEW_SHOT_EXAMPLE_PLACEHOLDER, few_shot_examples)
604
+
578
605
  response = self._call_model(
579
606
  model=self.model,
580
607
  messages=prompt_,
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import copy
2
3
  import logging
3
4
  import os
4
5
  from typing import Any, Callable, Dict, List, Optional, overload
@@ -255,7 +256,7 @@ class MetaPromptOptimizer(BaseOptimizer):
255
256
  **experiment_config,
256
257
  **{
257
258
  "optimizer": self.__class__.__name__,
258
- "metric": metric.__name__,
259
+ "metric": getattr(metric, '__name__', str(metric)),
259
260
  "dataset": dataset.name,
260
261
  "configuration": {
261
262
  "prompt": prompt.formatted_messages,
@@ -300,7 +301,7 @@ class MetaPromptOptimizer(BaseOptimizer):
300
301
 
301
302
  # Use dataset's get_items with limit for sampling
302
303
  logger.debug(
303
- f"Starting evaluation with {subset_size if subset_size else 'all'} samples for metric: {metric.__name__}"
304
+ f"Starting evaluation with {subset_size if subset_size else 'all'} samples for metric: {getattr(metric, '__name__', str(metric))}"
304
305
  )
305
306
  score = task_evaluator.evaluate(
306
307
  dataset=dataset,
@@ -341,8 +342,15 @@ class MetaPromptOptimizer(BaseOptimizer):
341
342
  Returns:
342
343
  OptimizationResult: Structured result containing optimization details
343
344
  """
344
- reporting.display_header(self.__class__.__name__, verbose=self.verbose)
345
+ if not isinstance(prompt, chat_prompt.ChatPrompt):
346
+ raise ValueError("Prompt must be a ChatPrompt object")
345
347
 
348
+ if not isinstance(dataset, Dataset):
349
+ raise ValueError("Dataset must be a Dataset object")
350
+
351
+ if not isinstance(metric, Callable):
352
+ raise ValueError("Metric must be a function that takes `dataset_item` and `llm_output` as arguments.")
353
+
346
354
  total_items = len(dataset.get_items())
347
355
  if n_samples is not None and n_samples > total_items:
348
356
  logger.warning(
@@ -350,21 +358,12 @@ class MetaPromptOptimizer(BaseOptimizer):
350
358
  )
351
359
  n_samples = None
352
360
 
353
- reporting.display_configuration(
354
- messages=prompt.formatted_messages,
355
- optimizer_config={
356
- "optimizer": self.__class__.__name__,
357
- "n_samples": n_samples,
358
- "auto_continue": auto_continue
359
- },
360
- verbose=self.verbose
361
- )
362
-
361
+
363
362
  optimization = None
364
363
  try:
365
364
  optimization = self._opik_client.create_optimization(
366
365
  dataset_name=dataset.name,
367
- objective_name=metric.__name__,
366
+ objective_name=getattr(metric, '__name__', str(metric)),
368
367
  metadata={"optimizer": self.__class__.__name__},
369
368
  )
370
369
  logger.debug(f"Created optimization with ID: {optimization.id}")
@@ -374,6 +373,22 @@ class MetaPromptOptimizer(BaseOptimizer):
374
373
  )
375
374
  optimization = None
376
375
 
376
+ reporting.display_header(
377
+ algorithm=self.__class__.__name__,
378
+ optimization_id=optimization.id if optimization is not None else None,
379
+ dataset_id=dataset.id,
380
+ verbose=self.verbose
381
+ )
382
+ reporting.display_configuration(
383
+ messages=prompt.formatted_messages,
384
+ optimizer_config={
385
+ "optimizer": self.__class__.__name__,
386
+ "n_samples": n_samples,
387
+ "auto_continue": auto_continue
388
+ },
389
+ verbose=self.verbose
390
+ )
391
+
377
392
  try:
378
393
  result = self._optimize_prompt(
379
394
  optimization_id=optimization.id if optimization is not None else None,
@@ -411,6 +426,7 @@ class MetaPromptOptimizer(BaseOptimizer):
411
426
  self.dataset = dataset
412
427
  self.prompt = prompt
413
428
  self.llm_call_counter = 0 # Reset counter for run
429
+ initial_prompt: List[Dict[str, str]] = prompt.formatted_messages
414
430
 
415
431
  current_prompt = prompt.formatted_messages
416
432
  experiment_config = experiment_config or {}
@@ -418,7 +434,7 @@ class MetaPromptOptimizer(BaseOptimizer):
418
434
  **experiment_config,
419
435
  **{
420
436
  "optimizer": self.__class__.__name__,
421
- "metric": metric.__name__,
437
+ "metric": getattr(metric, '__name__', str(metric)),
422
438
  "dataset": self.dataset.name,
423
439
  "configuration": {
424
440
  "prompt": current_prompt,
@@ -527,11 +543,11 @@ class MetaPromptOptimizer(BaseOptimizer):
527
543
 
528
544
  return self._create_result(
529
545
  metric,
530
- prompt,
531
- best_prompt,
532
- best_score,
533
- initial_score,
534
- rounds,
546
+ initial_prompt=initial_prompt,
547
+ best_prompt=best_prompt,
548
+ best_score=best_score,
549
+ initial_score=initial_score,
550
+ rounds=rounds,
535
551
  )
536
552
 
537
553
  def _calculate_improvement(
@@ -581,21 +597,19 @@ class MetaPromptOptimizer(BaseOptimizer):
581
597
  def _create_result(
582
598
  self,
583
599
  metric: Callable,
584
- prompt: chat_prompt.ChatPrompt,
585
- best_prompt: str,
600
+ initial_prompt: List[Dict[str, str]],
601
+ best_prompt: List[Dict[str, str]],
586
602
  best_score: float,
587
603
  initial_score: float,
588
604
  rounds: List[OptimizationRound],
589
605
  ) -> OptimizationResult:
590
606
  """Create the final OptimizationResult object."""
591
607
  details = {
592
- "initial_prompt": prompt,
593
- "initial_score": initial_score,
594
608
  "final_prompt": best_prompt,
595
609
  "final_score": best_score,
596
610
  "rounds": rounds,
597
611
  "total_rounds": len(rounds),
598
- "metric_name": metric.__name__,
612
+ "metric_name": getattr(metric, '__name__', str(metric)),
599
613
  "model": self.model,
600
614
  "temperature": self.model_kwargs.get("temperature"),
601
615
  }
@@ -604,7 +618,9 @@ class MetaPromptOptimizer(BaseOptimizer):
604
618
  optimizer=self.__class__.__name__,
605
619
  prompt=best_prompt,
606
620
  score=best_score,
607
- metric_name=metric.__name__,
621
+ initial_prompt=initial_prompt,
622
+ initial_score=initial_score,
623
+ metric_name=getattr(metric, '__name__', str(metric)),
608
624
  details=details,
609
625
  llm_calls=self.llm_call_counter
610
626
  )
@@ -1,6 +1,5 @@
1
1
  from contextlib import contextmanager
2
2
 
3
- import rich
4
3
  from rich.text import Text
5
4
 
6
5
  from ..reporting_utils import (
@@ -82,6 +82,7 @@ class LM(BaseLM):
82
82
  self.finetuning_model = finetuning_model
83
83
  self.launch_kwargs = launch_kwargs or {}
84
84
  self.train_kwargs = train_kwargs or {}
85
+ self.llm_call_counter = 0
85
86
 
86
87
  # Handle model-specific configuration for different model families
87
88
  model_family = model.split("/")[-1].lower() if "/" in model else model.lower()
@@ -129,6 +130,7 @@ class LM(BaseLM):
129
130
  if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
130
131
  settings.usage_tracker.add_usage(self.model, dict(results.usage))
131
132
 
133
+ self.llm_call_counter += 1
132
134
  return results
133
135
 
134
136
  def launch(self, launch_kwargs: Optional[Dict[str, Any]] = None):
@@ -323,6 +325,7 @@ def litellm_completion(request: Dict[str, Any], num_retries: int, cache={"no-cac
323
325
  **retry_kwargs,
324
326
  **request,
325
327
  )
328
+
326
329
  chunks = []
327
330
  async for chunk in response:
328
331
  if caller_predict_id:
@@ -42,11 +42,10 @@ class MiproOptimizer(BaseOptimizer):
42
42
  self.tools = []
43
43
  self.num_threads = self.model_kwargs.pop("num_threads", 6)
44
44
  self.model_kwargs["model"] = self.model
45
- self.llm_call_counter = 0
46
45
  # FIXME: add mipro_optimizer=True - It does not count the LLM calls made internally by DSPy during MiproOptimizer.optimizer.compile().
47
- lm = LM(**self.model_kwargs)
46
+ self.lm = LM(**self.model_kwargs)
48
47
  opik_callback = OpikCallback(project_name=self.project_name, log_graph=True)
49
- dspy.configure(lm=lm, callbacks=[opik_callback])
48
+ dspy.configure(lm=self.lm, callbacks=[opik_callback])
50
49
  logger.debug(f"Initialized MiproOptimizer with model: {model}")
51
50
 
52
51
  def evaluate_prompt(
@@ -54,7 +53,7 @@ class MiproOptimizer(BaseOptimizer):
54
53
  dataset: Union[str, Dataset],
55
54
  metric: Callable,
56
55
  task_config: TaskConfig,
57
- prompt: Union[str, dspy.Module, OptimizationResult] = None,
56
+ prompt: Optional[Union[str, dspy.Module, OptimizationResult]] = None,
58
57
  n_samples: int = 10,
59
58
  dataset_item_ids: Optional[List[str]] = None,
60
59
  experiment_config: Optional[Dict] = None,
@@ -463,7 +462,7 @@ class MiproOptimizer(BaseOptimizer):
463
462
  metric_name=self.opik_metric.__name__ if hasattr(self, 'opik_metric') else "unknown_metric",
464
463
  details={"error": "No candidate programs generated by MIPRO"},
465
464
  history=mipro_history_processed,
466
- llm_calls=self.llm_call_counter
465
+ llm_calls=self.lm.llm_call_counter
467
466
  )
468
467
 
469
468
  self.module = self.get_best().details["program"]
@@ -488,7 +487,7 @@ class MiproOptimizer(BaseOptimizer):
488
487
  demonstrations=best_program_details.demonstrations,
489
488
  details=best_program_details.details,
490
489
  history=mipro_history_processed,
491
- llm_calls=self.llm_call_counter
490
+ llm_calls=self.lm.llm_call_counter
492
491
  )
493
492
 
494
493
  def get_best(self, position: int = 0) -> OptimizationResult:
@@ -501,7 +500,7 @@ class MiproOptimizer(BaseOptimizer):
501
500
  metric_name=getattr(self, 'opik_metric', None).name if hasattr(self, 'opik_metric') and self.opik_metric else "unknown_metric",
502
501
  details={"error": "No programs generated or compile failed"},
503
502
  history=[],
504
- llm_calls=self.llm_call_counter
503
+ llm_calls=self.lm.llm_call_counter
505
504
  )
506
505
 
507
506
  score = self.best_programs[position]["score"]
@@ -528,5 +527,5 @@ class MiproOptimizer(BaseOptimizer):
528
527
  metric_name=self.opik_metric.__name__,
529
528
  demonstrations=demos,
530
529
  details={"program": program_module},
531
- llm_calls=self.llm_call_counter
530
+ llm_calls=self.lm.llm_call_counter
532
531
  )
@@ -17,6 +17,10 @@ class OptimizationResult(pydantic.BaseModel):
17
17
  score: float
18
18
  metric_name: str
19
19
 
20
+ # Initial score
21
+ initial_prompt: Optional[List[Dict[Literal["role", "content"], str]]] = None
22
+ initial_score: Optional[float] = None
23
+
20
24
  details: Dict[str, Any] = pydantic.Field(default_factory=dict)
21
25
  history: List[Dict[str, Any]] = []
22
26
  llm_calls: Optional[int] = None
@@ -33,7 +37,7 @@ class OptimizationResult(pydantic.BaseModel):
33
37
 
34
38
  def _calculate_improvement_str(self) -> str:
35
39
  """Helper to calculate improvement percentage string."""
36
- initial_s = self.details.get("initial_score")
40
+ initial_s = self.initial_score
37
41
  final_s = self.score
38
42
 
39
43
  # Check if initial score exists and is a number
@@ -60,7 +64,7 @@ class OptimizationResult(pydantic.BaseModel):
60
64
  """Provides a clean, well-formatted plain-text summary."""
61
65
  separator = "=" * 80
62
66
  rounds_ran = len(self.details.get("rounds", []))
63
- initial_score = self.details.get("initial_score")
67
+ initial_score = self.initial_score
64
68
  initial_score_str = (
65
69
  f"{initial_score:.4f}" if isinstance(initial_score, (int, float)) else "N/A"
66
70
  )
@@ -74,7 +78,6 @@ class OptimizationResult(pydantic.BaseModel):
74
78
  .replace("[dim]", "")
75
79
  .replace("[/dim]", "")
76
80
  )
77
- stopped_early = self.details.get("stopped_early", "N/A")
78
81
 
79
82
  model_name = self.details.get("model", "N/A")
80
83
  temp = self.details.get("temperature")
@@ -101,7 +104,6 @@ class OptimizationResult(pydantic.BaseModel):
101
104
  f"Final Best Score: {final_score_str}",
102
105
  f"Total Improvement:{improvement_str.rjust(max(0, 18 - len('Total Improvement:')))}",
103
106
  f"Rounds Completed: {rounds_ran}",
104
- f"Stopped Early: {stopped_early}",
105
107
  "\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
106
108
  "--------------------------------------------------------------------------------",
107
109
  f"{final_prompt_display}",
@@ -114,7 +116,7 @@ class OptimizationResult(pydantic.BaseModel):
114
116
  """Provides a rich, formatted output for terminals supporting Rich."""
115
117
  improvement_str = self._calculate_improvement_str()
116
118
  rounds_ran = len(self.details.get("rounds", []))
117
- initial_score = self.details.get("initial_score")
119
+ initial_score = self.initial_score
118
120
  initial_score_str = (
119
121
  f"{initial_score:.4f}"
120
122
  if isinstance(initial_score, (int, float))
@@ -124,8 +126,6 @@ class OptimizationResult(pydantic.BaseModel):
124
126
  stopped_early = self.details.get("stopped_early", "N/A")
125
127
 
126
128
  model_name = self.details.get("model", "[dim]N/A[/dim]")
127
- temp = self.details.get("temperature")
128
- temp_str = f"{temp:.1f}" if isinstance(temp, (int, float)) else "[dim]N/A[/dim]"
129
129
 
130
130
  table = rich.table.Table.grid(padding=(0, 1))
131
131
  table.add_column(style="dim")
@@ -135,13 +135,12 @@ class OptimizationResult(pydantic.BaseModel):
135
135
  "Optimizer:",
136
136
  f"[bold]{self.optimizer}[/bold]",
137
137
  )
138
- table.add_row("Model Used:", f"{model_name} ([dim]Temp:[/dim] {temp_str})")
138
+ table.add_row("Model Used:", f"{model_name}")
139
139
  table.add_row("Metric Evaluated:", f"[bold]{self.metric_name}[/bold]")
140
140
  table.add_row("Initial Score:", initial_score_str)
141
141
  table.add_row("Final Best Score:", f"[bold cyan]{final_score_str}[/bold cyan]")
142
142
  table.add_row("Total Improvement:", improvement_str)
143
143
  table.add_row("Rounds Completed:", str(rounds_ran))
144
- table.add_row("Stopped Early:", str(stopped_early))
145
144
 
146
145
  # Display Chat Structure if available
147
146
  panel_title = "[bold]Final Optimized Prompt[/bold]"
@@ -2,13 +2,14 @@ import logging
2
2
  from contextlib import contextmanager
3
3
  from typing import Dict, List, Optional
4
4
 
5
- import rich
6
5
  from rich import box
7
6
  from rich.console import Console, Group
8
7
  from rich.panel import Panel
9
8
  from rich.progress import track
10
9
  from rich.text import Text
11
10
 
11
+ from .utils import get_optimization_run_url_by_id
12
+
12
13
  PANEL_WIDTH = 70
13
14
 
14
15
  def get_console(*args, **kwargs):
@@ -21,10 +22,8 @@ def convert_tqdm_to_rich(description: Optional[str] = None, verbose: int = 1):
21
22
  """Context manager to convert tqdm to rich."""
22
23
  import opik.evaluation.engine.evaluation_tasks_executor
23
24
 
24
- optimizer_logger = logging.getLogger('opik_optimizer')
25
-
26
25
  def _tqdm_to_track(iterable, desc, disable, total):
27
- disable = verbose == 0 or optimizer_logger.level > logging.INFO
26
+ disable = verbose == 0
28
27
  return track(
29
28
  iterable,
30
29
  description=description or desc,
@@ -91,16 +90,36 @@ def display_messages(messages: List[Dict[str, str]], prefix: str = ""):
91
90
  for line in rendered_panel.splitlines():
92
91
  console.print(Text(prefix) + Text.from_ansi(line))
93
92
 
94
- def display_header(algorithm: str, verbose: int = 1):
93
+ def display_header(
94
+ algorithm: str,
95
+ optimization_id: Optional[str]=None,
96
+ dataset_id: Optional[str]=None,
97
+ verbose: int = 1
98
+ ):
95
99
  if verbose < 1:
96
100
  return
97
101
 
102
+ if optimization_id is not None and dataset_id is not None:
103
+ optimization_url = get_optimization_run_url_by_id(
104
+ optimization_id=optimization_id,
105
+ dataset_id=dataset_id
106
+ )
107
+
108
+ # Create a visually appealing panel with an icon and ensure link doesn't wrap
109
+
110
+ link_text = Text("-> View optimization details in your Opik dashboard")
111
+ link_text.stylize(f"link {optimization_url}", 28, len(link_text))
112
+ else:
113
+ link_text = Text("No optimization run link available", style="dim")
114
+
98
115
  content = Text.assemble(
99
116
  ("● ", "green"),
100
117
  "Running Opik Evaluation - ",
101
- (algorithm, "blue")
102
- )
118
+ (algorithm, "blue"),
119
+ "\n\n"
120
+ ).append(link_text)
103
121
 
122
+
104
123
  panel = Panel(
105
124
  content,
106
125
  box=box.ROUNDED,
@@ -126,7 +145,7 @@ def display_result(initial_score, best_score, best_prompt, verbose: int = 1):
126
145
  perc_change = (best_score - initial_score) / initial_score
127
146
  content = [Text(f"Prompt was optimized and improved from {initial_score:.4f} to {best_score:.4f} ({perc_change:.2%})", style="bold green")]
128
147
  else:
129
- content = [Text("Optimization trial did not find a better prompt than the initial one.", style="bold red")]
148
+ content = [Text(f"Optimization run did not find a better prompt than the initial one.\nScore: {best_score:.4f}", style="dim bold red")]
130
149
 
131
150
  content.append(Text("\nOptimized prompt:"))
132
151
  for i, msg in enumerate(best_prompt):
opik_optimizer/utils.py CHANGED
@@ -1,23 +1,17 @@
1
1
  """Utility functions and constants for the optimizer package."""
2
2
 
3
- from typing import Dict, Any, Optional, TYPE_CHECKING, Type, Literal, Final
4
- from types import TracebackType
5
-
6
- import opik
7
- from opik.api_objects.opik_client import Opik
8
- from opik.api_objects.optimization import Optimization
9
-
3
+ import base64
10
4
  import json
11
5
  import logging
12
6
  import random
13
7
  import string
14
- import base64
15
8
  import urllib.parse
16
- from rich import console
9
+ from types import TracebackType
10
+ from typing import Any, Dict, Final, Literal, Optional, Type
17
11
 
18
- # Type hint for OptimizationResult without circular import
19
- if TYPE_CHECKING:
20
- from .optimization_result import OptimizationResult
12
+ import opik
13
+ from opik.api_objects.opik_client import Opik
14
+ from opik.api_objects.optimization import Optimization
21
15
 
22
16
  ALLOWED_URL_CHARACTERS: Final[str] = ":/&?="
23
17
  logger = logging.getLogger(__name__)
@@ -63,6 +57,7 @@ class OptimizationContextManager:
63
57
  name=self.name,
64
58
  metadata=self.metadata,
65
59
  )
60
+
66
61
  if self.optimization:
67
62
  return self.optimization
68
63
  else:
@@ -238,8 +233,10 @@ def ensure_ending_slash(url: str) -> str:
238
233
 
239
234
 
240
235
  def get_optimization_run_url_by_id(
241
- dataset_id: str, optimization_id: str, url_override: str
236
+ dataset_id: str, optimization_id: str
242
237
  ) -> str:
238
+ opik_config = opik.config.get_from_user_inputs()
239
+ url_override = opik_config.url_override
243
240
  encoded_opik_url = base64.b64encode(url_override.encode("utf-8")).decode("utf-8")
244
241
 
245
242
  run_path = urllib.parse.quote(
@@ -247,18 +244,3 @@ def get_optimization_run_url_by_id(
247
244
  safe=ALLOWED_URL_CHARACTERS,
248
245
  )
249
246
  return urllib.parse.urljoin(ensure_ending_slash(url_override), run_path)
250
-
251
-
252
- def display_optimization_run_link(
253
- optimization_id: str, dataset_id: str, url_override: str
254
- ) -> None:
255
- console_container = console.Console()
256
-
257
- optimization_url = get_optimization_run_url_by_id(
258
- optimization_id=optimization_id,
259
- dataset_id=dataset_id,
260
- url_override=url_override,
261
- )
262
- console_container.print(
263
- f"View the optimization run [link={optimization_url}]in your Opik dashboard[/link]."
264
- )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opik_optimizer
3
- Version: 0.9.1
3
+ Version: 0.9.2
4
4
  Summary: Agent optimization with Opik
5
5
  Home-page: https://github.com/comet-ml/opik
6
6
  Author: Comet ML
@@ -3,10 +3,10 @@ opik_optimizer/_throttle.py,sha256=ztub8qlwz4u0GVA2TIoLig0D1Cs0hJ7_o_SnT_C7Nmk,1
3
3
  opik_optimizer/base_optimizer.py,sha256=mg5D5m2hIzq3XbVjRhx8c_HuXWZWaRE2J6QtkHnxkqE,4439
4
4
  opik_optimizer/cache_config.py,sha256=EzF4RAzxhSG8vtMJANdiUpNHQ9HzL2CrCXp0iik0f4A,580
5
5
  opik_optimizer/logging_config.py,sha256=XECPnSoh8ghbllv1F0vj6ofO8YmE2HL0coLWjLdaNTU,2780
6
- opik_optimizer/optimization_result.py,sha256=PeDIoNIHaJsi9WsDoKPgO6sW3CkPKZKK7RvY8tmjUN0,7508
7
- opik_optimizer/reporting_utils.py,sha256=pRcRhE9w1q6PVdTmqrTyqIlUmeMAknwJJFT99FG0tuk,5523
6
+ opik_optimizer/optimization_result.py,sha256=p_vHW5NtdeJHJ9KpmbM8JMd3CqG_wd02XzClRL7mMgs,7270
7
+ opik_optimizer/reporting_utils.py,sha256=dL1u8Wpo7S9H480FxDENsxFe301PdTqHcXbFNqqXdik,6175
8
8
  opik_optimizer/task_evaluator.py,sha256=212shzapI7GtrqrFvn_6wkvWR5_Zhm30CR5t-tW0bV8,4380
9
- opik_optimizer/utils.py,sha256=y7I58vESRphuMLA4cfH3sNiSuntyiqG8hwK5UuwVdt4,8239
9
+ opik_optimizer/utils.py,sha256=mf-773TJH5Cm6cQXcsntEOQ-6WEaYUaILIonuaEErmo,7707
10
10
  opik_optimizer/data/hotpot-500.json,sha256=YXxCtuvYvxSu5u0y4559a6b1qwgAYsWzT_SUKv_21ew,76862
11
11
  opik_optimizer/datasets/__init__.py,sha256=V4LVDOaRjwzaYvhdQ3V6CAwFaeKnxyTV1lp_ES9Z31E,691
12
12
  opik_optimizer/datasets/ai2_arc.py,sha256=PMWInWVRPQ9u_nlr9N531CeVKjI6y_ZSQmNY2t1zwOI,1401
@@ -24,25 +24,25 @@ opik_optimizer/demo/__init__.py,sha256=KSpFYhzN7fTmLEsIaciRHwxcJDeAiX5NDmYLdPsfp
24
24
  opik_optimizer/demo/cache.py,sha256=5WqK8rSiijzU6s4VHIjLuL1LR5i1yHtY-x5FZTduSus,3669
25
25
  opik_optimizer/demo/datasets.py,sha256=MezQlG4Q_cgSH7zQOmJcDwkGU8JV0xKSnZwCJGaj-88,2494
26
26
  opik_optimizer/evolutionary_optimizer/__init__.py,sha256=OQ2ART5g-7EVGOISvTGY-AbmEyyDFEJJCsmJBzGJIpw,57
27
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py,sha256=cnpga8ytSjx5lNq2URLBCyV9s5r1s9_eKD4BU7rWW_g,76259
28
- opik_optimizer/evolutionary_optimizer/reporting.py,sha256=Gl52sH7XaU4GXUhFt_FcfjYFN3MghnDt1ISULATjbP4,9944
27
+ opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py,sha256=tZLID_N4auZ7v7MWZesr9xbzRMudznAUQ8Q9XzhbqRY,76948
28
+ opik_optimizer/evolutionary_optimizer/reporting.py,sha256=w5fWIA9F30a25E5BpPxKLubNitcXmU5KzeBdS86Ajao,9899
29
29
  opik_optimizer/few_shot_bayesian_optimizer/__init__.py,sha256=VuH7FOROyGcjMPryejtZC-5Y0QHlVTFLTGUDgNqRAFw,113
30
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py,sha256=trQ7lAdPyNpMrCwdQeq67FCosQuoN3uvLM6lXoJPkQQ,24457
30
+ opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py,sha256=PwpfrZCauun0zwne4Kj1l3V9KQjX5phoz61FFZN9MHk,25660
31
31
  opik_optimizer/few_shot_bayesian_optimizer/reporting.py,sha256=j1mNEQyFT7YUVlMU1TxPZxrf5sPwiHZ2nx1fOL4ZIog,4756
32
32
  opik_optimizer/meta_prompt_optimizer/__init__.py,sha256=syiN2_fMm5iZDQezZCHYe-ZiGOIPlBkLt49Sa1kuR70,97
33
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py,sha256=LrN8kGoB-qm5Tvncpmcy2qd79vxkcMokei2sMXrv0jw,34404
34
- opik_optimizer/meta_prompt_optimizer/reporting.py,sha256=4Lju2uxSBkCVYyJ6ZSS-GjDFVnmP14R6XVtr-tEFlL0,5765
33
+ opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py,sha256=X5HaT1enrwp_oYK1vuBhaDLBW7A3apP4mlElr0SDQhY,35308
34
+ opik_optimizer/meta_prompt_optimizer/reporting.py,sha256=IXV1R3T4cdVOJBTEgACQEp5zhrRcDg0DWuebexcfN60,5753
35
35
  opik_optimizer/mipro_optimizer/__init__.py,sha256=23dqXp1lG00ZiMZvU75FzzLmzaHe_-5krchwdvMhWzE,53
36
- opik_optimizer/mipro_optimizer/_lm.py,sha256=bcTy2Y5HjSaFQOATIpUaA86eIp3vKHaMuDI2_RvN2ww,16376
36
+ opik_optimizer/mipro_optimizer/_lm.py,sha256=hqSdGR0DsuiXAjxfiIFAdFxeG5cfcMtRc7jzXa74fKM,16446
37
37
  opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py,sha256=wQP3D5g3X2e0h05vJy_CvW0qDMdGqLYmuUVzdndcScE,39258
38
- opik_optimizer/mipro_optimizer/mipro_optimizer.py,sha256=pfD8toZVCpqSDdGwyOUvAeyORyGyYqrua71JFzVw2GA,23305
38
+ opik_optimizer/mipro_optimizer/mipro_optimizer.py,sha256=lQcElTUwkJKKiF6eRuxcRKETq6_TvyuGXLJGw6mIIS4,23303
39
39
  opik_optimizer/mipro_optimizer/utils.py,sha256=-d9xOKxmYbKwpNM2aheKQVf3gxCh5B1ENuAvzc38xe8,2509
40
40
  opik_optimizer/optimization_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  opik_optimizer/optimization_config/chat_prompt.py,sha256=nGSElB4CyOegYi06oFmaVeFKI6XRv6LJOWJ1axhjyyY,3500
42
42
  opik_optimizer/optimization_config/configs.py,sha256=HzpEP84bnqtDs76dtmPGecDQ-Ux2wIk0JVv7A2gsE3k,496
43
43
  opik_optimizer/optimization_config/mappers.py,sha256=RMUWwYvXNCJe6w1jYiT6EX218UYZS1PUMMe12OjNEug,1692
44
- opik_optimizer-0.9.1.dist-info/licenses/LICENSE,sha256=dTRSwwCHdWeSjzodvnivYqcwi8x3Qfr21yv65QUWWBE,1062
45
- opik_optimizer-0.9.1.dist-info/METADATA,sha256=XodSnPMwsIwJ2WF618unt3iXHoCbYfokqEM7xnN9vW4,6588
46
- opik_optimizer-0.9.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- opik_optimizer-0.9.1.dist-info/top_level.txt,sha256=ondOlpq6_yFckqpxoAHSfzZS2N-JfgmA-QQhOJfz7m0,15
48
- opik_optimizer-0.9.1.dist-info/RECORD,,
44
+ opik_optimizer-0.9.2.dist-info/licenses/LICENSE,sha256=dTRSwwCHdWeSjzodvnivYqcwi8x3Qfr21yv65QUWWBE,1062
45
+ opik_optimizer-0.9.2.dist-info/METADATA,sha256=e2QQWNkEFwPqHKGAnfBGrR-pQhhR-vOBrrtHaszA3lk,6588
46
+ opik_optimizer-0.9.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ opik_optimizer-0.9.2.dist-info/top_level.txt,sha256=ondOlpq6_yFckqpxoAHSfzZS2N-JfgmA-QQhOJfz7m0,15
48
+ opik_optimizer-0.9.2.dist-info/RECORD,,