opik-optimizer 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +37 -19
- opik_optimizer/evolutionary_optimizer/reporting.py +0 -2
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +44 -17
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +42 -26
- opik_optimizer/meta_prompt_optimizer/reporting.py +0 -1
- opik_optimizer/mipro_optimizer/_lm.py +3 -0
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +7 -8
- opik_optimizer/optimization_result.py +8 -9
- opik_optimizer/reporting_utils.py +27 -8
- opik_optimizer/utils.py +10 -28
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-0.9.2.dist-info}/METADATA +1 -1
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-0.9.2.dist-info}/RECORD +15 -15
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-0.9.2.dist-info}/WHEEL +0 -0
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-0.9.2.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-0.9.1.dist-info → opik_optimizer-0.9.2.dist-info}/top_level.txt +0 -0
@@ -828,7 +828,35 @@ Return only the new prompt list object.
|
|
828
828
|
auto_continue: Whether to automatically continue optimization
|
829
829
|
**kwargs: Additional keyword arguments
|
830
830
|
"""
|
831
|
-
|
831
|
+
if not isinstance(prompt, chat_prompt.ChatPrompt):
|
832
|
+
raise ValueError("Prompt must be a ChatPrompt object")
|
833
|
+
|
834
|
+
if not isinstance(dataset, opik.Dataset):
|
835
|
+
raise ValueError("Dataset must be a Dataset object")
|
836
|
+
|
837
|
+
if not isinstance(metric, Callable):
|
838
|
+
raise ValueError("Metric must be a function that takes `dataset_item` and `llm_output` as arguments.")
|
839
|
+
|
840
|
+
# Step 0. Start Opik optimization run
|
841
|
+
opik_optimization_run: Optional[optimization.Optimization] = None
|
842
|
+
try:
|
843
|
+
opik_optimization_run: optimization.Optimization = self._opik_client.create_optimization(
|
844
|
+
dataset_name=dataset.name,
|
845
|
+
objective_name=metric.__name__,
|
846
|
+
metadata={"optimizer": self.__class__.__name__},
|
847
|
+
)
|
848
|
+
self._current_optimization_id = opik_optimization_run.id
|
849
|
+
except Exception as e:
|
850
|
+
logger.warning(f"Opik server error: {e}. Continuing without Opik tracking.")
|
851
|
+
self._current_optimization_id = None
|
852
|
+
|
853
|
+
reporting.display_header(
|
854
|
+
algorithm=self.__class__.__name__,
|
855
|
+
optimization_id=self._current_optimization_id,
|
856
|
+
dataset_id=dataset.id,
|
857
|
+
verbose=self.verbose
|
858
|
+
)
|
859
|
+
|
832
860
|
reporting.display_configuration(
|
833
861
|
prompt.formatted_messages,
|
834
862
|
{
|
@@ -841,9 +869,9 @@ Return only the new prompt list object.
|
|
841
869
|
verbose=self.verbose
|
842
870
|
)
|
843
871
|
|
872
|
+
# Step 1. Step variables and define fitness function
|
844
873
|
self.llm_call_counter = 0
|
845
874
|
self._history = []
|
846
|
-
self._current_optimization_id = None
|
847
875
|
self._current_generation = 0
|
848
876
|
self._best_fitness_history = []
|
849
877
|
self._generations_without_improvement = 0
|
@@ -851,7 +879,6 @@ Return only the new prompt list object.
|
|
851
879
|
self._current_population = []
|
852
880
|
self._generations_without_overall_improvement = 0
|
853
881
|
|
854
|
-
# Step 0. Define fitness function
|
855
882
|
if self.enable_moo:
|
856
883
|
def _deap_evaluate_individual_fitness(
|
857
884
|
messages: List[Dict[str, str]]
|
@@ -884,19 +911,6 @@ Return only the new prompt list object.
|
|
884
911
|
return (fitness_score,)
|
885
912
|
self.toolbox.register("evaluate", _deap_evaluate_individual_fitness)
|
886
913
|
|
887
|
-
# Step 1. Start Opik optimization run
|
888
|
-
opik_optimization_run: Optional[optimization.Optimization] = None
|
889
|
-
try:
|
890
|
-
opik_optimization_run: optimization.Optimization = self._opik_client.create_optimization(
|
891
|
-
dataset_name=dataset.name,
|
892
|
-
objective_name=metric.__name__,
|
893
|
-
metadata={"optimizer": self.__class__.__name__},
|
894
|
-
)
|
895
|
-
self._current_optimization_id = opik_optimization_run.id
|
896
|
-
logger.info(f"Created Opik Optimization run with ID: {self._current_optimization_id}")
|
897
|
-
except Exception as e:
|
898
|
-
logger.warning(f"Opik server error: {e}. Continuing without Opik tracking.")
|
899
|
-
|
900
914
|
# Step 2. Compute the initial performance of the prompt
|
901
915
|
with reporting.baseline_performance(verbose=self.verbose) as report_baseline_performance:
|
902
916
|
initial_eval_result: Tuple[float, float] | Tuple[float, ] = _deap_evaluate_individual_fitness(prompt.formatted_messages)
|
@@ -976,7 +990,7 @@ Return only the new prompt list object.
|
|
976
990
|
best_prompt=best_prompt_overall,
|
977
991
|
best_score=best_primary_score_overall,
|
978
992
|
improvement=0.0
|
979
|
-
).
|
993
|
+
).model_dump()
|
980
994
|
self._add_to_history(initial_round_data)
|
981
995
|
|
982
996
|
with reporting.start_evolutionary_algo(verbose=self.verbose) as report_evolutionary_algo:
|
@@ -1035,7 +1049,7 @@ Return only the new prompt list object.
|
|
1035
1049
|
best_prompt=best_prompt_overall,
|
1036
1050
|
best_score=best_primary_score_overall,
|
1037
1051
|
improvement=(best_primary_score_overall - initial_primary_score) / abs(initial_primary_score) if initial_primary_score and initial_primary_score != 0 else (1.0 if best_primary_score_overall > 0 else 0.0)
|
1038
|
-
).
|
1052
|
+
).model_dump()
|
1039
1053
|
self._add_to_history(gen_round_data)
|
1040
1054
|
|
1041
1055
|
stopped_early_flag = self._generations_without_overall_improvement >= self.DEFAULT_EARLY_STOPPING_GENERATIONS
|
@@ -1101,6 +1115,7 @@ Return only the new prompt list object.
|
|
1101
1115
|
# Add final details
|
1102
1116
|
final_details.update({
|
1103
1117
|
"total_generations_run": generation_idx + 1,
|
1118
|
+
"num_generations": self.num_generations,
|
1104
1119
|
"population_size": self.population_size,
|
1105
1120
|
"mutation_probability": self.mutation_rate,
|
1106
1121
|
"crossover_probability": self.crossover_rate,
|
@@ -1132,7 +1147,9 @@ Return only the new prompt list object.
|
|
1132
1147
|
return OptimizationResult(
|
1133
1148
|
optimizer=self.__class__.__name__,
|
1134
1149
|
prompt=final_best_prompt.formatted_messages,
|
1135
|
-
score=final_primary_score,
|
1150
|
+
score=final_primary_score,
|
1151
|
+
initial_prompt=prompt.formatted_messages,
|
1152
|
+
initial_score=initial_primary_score,
|
1136
1153
|
metric_name=metric.__name__,
|
1137
1154
|
details=final_details,
|
1138
1155
|
history=self.get_history(),
|
@@ -1186,6 +1203,7 @@ Return only the new prompt list object.
|
|
1186
1203
|
response = litellm.completion(
|
1187
1204
|
model=self.model, messages=messages, **final_call_params
|
1188
1205
|
)
|
1206
|
+
self.llm_call_counter += 1
|
1189
1207
|
|
1190
1208
|
logger.debug(f"Response: {response}")
|
1191
1209
|
return response.choices[0].message.content
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import copy
|
1
2
|
import json
|
2
3
|
import logging
|
3
4
|
import random
|
@@ -194,6 +195,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
194
195
|
|
195
196
|
def _run_optimization(
|
196
197
|
self,
|
198
|
+
initial_prompt: chat_prompt.ChatPrompt,
|
197
199
|
fewshot_prompt_template: FewShotPromptTemplate,
|
198
200
|
dataset: Dataset,
|
199
201
|
metric: Callable,
|
@@ -249,13 +251,14 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
249
251
|
for key, value in example.items():
|
250
252
|
processed_example[key] = str(value)
|
251
253
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
254
|
+
processed_demo_example=fewshot_prompt_template.example_template
|
255
|
+
for key, value in processed_example.items():
|
256
|
+
try:
|
257
|
+
processed_demo_example=processed_demo_example.replace(f"{{{key}}}", str(value))
|
258
|
+
except Exception:
|
259
|
+
logger.error(f"Failed to format fewshot prompt template {fewshot_prompt_template} with example: {processed_example} ")
|
260
|
+
raise
|
261
|
+
processed_demo_examples.append(processed_demo_example)
|
259
262
|
few_shot_examples = "\n\n".join(processed_demo_examples)
|
260
263
|
|
261
264
|
llm_task = self._build_task_from_messages(
|
@@ -364,6 +367,12 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
364
367
|
best_score = best_trial.value
|
365
368
|
best_example_indices = best_trial.user_attrs.get("example_indices", [])
|
366
369
|
|
370
|
+
if best_score <= baseline_score:
|
371
|
+
best_score = baseline_score
|
372
|
+
best_prompt = initial_prompt.formatted_messages
|
373
|
+
else:
|
374
|
+
best_prompt = best_trial.user_attrs["config"]["message_list"]
|
375
|
+
|
367
376
|
reporting.display_result(
|
368
377
|
initial_score=baseline_score,
|
369
378
|
best_score=best_score,
|
@@ -374,9 +383,12 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
374
383
|
return optimization_result.OptimizationResult(
|
375
384
|
optimizer=self.__class__.__name__,
|
376
385
|
prompt=best_trial.user_attrs["config"]["message_list"],
|
386
|
+
initial_prompt=initial_prompt.formatted_messages,
|
387
|
+
initial_score=baseline_score,
|
377
388
|
score=best_score,
|
378
389
|
metric_name=metric.__name__,
|
379
390
|
details={
|
391
|
+
"initial_score": baseline_score,
|
380
392
|
"chat_messages": best_trial.user_attrs["config"]["message_list"],
|
381
393
|
"prompt_parameter": best_trial.user_attrs["config"],
|
382
394
|
#"n_examples": best_n_examples,
|
@@ -414,6 +426,16 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
414
426
|
Returns:
|
415
427
|
OptimizationResult: Result of the optimization
|
416
428
|
"""
|
429
|
+
if not isinstance(prompt, chat_prompt.ChatPrompt):
|
430
|
+
raise ValueError("Prompt must be a ChatPrompt object")
|
431
|
+
|
432
|
+
if not isinstance(dataset, Dataset):
|
433
|
+
raise ValueError("Dataset must be a Dataset object")
|
434
|
+
|
435
|
+
if not isinstance(metric, Callable):
|
436
|
+
raise ValueError("Metric must be a function that takes `dataset_item` and `llm_output` as arguments.")
|
437
|
+
|
438
|
+
|
417
439
|
optimization = None
|
418
440
|
try:
|
419
441
|
optimization = self._opik_client.create_optimization(
|
@@ -421,15 +443,22 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
421
443
|
objective_name=metric.__name__,
|
422
444
|
metadata={"optimizer": self.__class__.__name__},
|
423
445
|
)
|
446
|
+
optimization_run_id = optimization.id
|
424
447
|
except Exception:
|
425
448
|
logger.warning(
|
426
449
|
"Opik server does not support optimizations. Please upgrade opik."
|
427
450
|
)
|
428
451
|
optimization = None
|
452
|
+
optimization_run_id = None
|
429
453
|
|
430
454
|
try:
|
431
455
|
# Start experiment reporting
|
432
|
-
reporting.display_header(
|
456
|
+
reporting.display_header(
|
457
|
+
algorithm=self.__class__.__name__,
|
458
|
+
optimization_id=optimization_run_id,
|
459
|
+
dataset_id=dataset.id,
|
460
|
+
verbose=self.verbose
|
461
|
+
)
|
433
462
|
reporting.display_configuration(
|
434
463
|
prompt.formatted_messages,
|
435
464
|
optimizer_config={
|
@@ -468,6 +497,7 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
468
497
|
|
469
498
|
# Step 3. Start the optimization process
|
470
499
|
result = self._run_optimization(
|
500
|
+
initial_prompt=prompt,
|
471
501
|
fewshot_prompt_template=fewshot_template,
|
472
502
|
dataset=dataset,
|
473
503
|
metric=metric,
|
@@ -563,18 +593,15 @@ class FewShotBayesianOptimizer(base_optimizer.BaseOptimizer):
|
|
563
593
|
self, messages: List[Dict[str, str]], few_shot_examples: Optional[str] = None
|
564
594
|
):
|
565
595
|
def llm_task(dataset_item: Dict[str, Any]) -> Dict[str, Any]:
|
596
|
+
prompt_ = copy.deepcopy(messages)
|
566
597
|
for key, value in dataset_item.items():
|
567
|
-
|
568
|
-
"
|
569
|
-
"content": item["content"].replace("{" + key + "}", str(value))
|
570
|
-
} for item in messages]
|
598
|
+
for item in prompt_:
|
599
|
+
item["content"] = item["content"].replace("{" + key + "}", str(value))
|
571
600
|
|
572
601
|
if few_shot_examples:
|
573
|
-
|
574
|
-
"
|
575
|
-
|
576
|
-
} for item in prompt_]
|
577
|
-
|
602
|
+
for item in prompt_:
|
603
|
+
item["content"] = item["content"].replace(FEW_SHOT_EXAMPLE_PLACEHOLDER, few_shot_examples)
|
604
|
+
|
578
605
|
response = self._call_model(
|
579
606
|
model=self.model,
|
580
607
|
messages=prompt_,
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
import copy
|
2
3
|
import logging
|
3
4
|
import os
|
4
5
|
from typing import Any, Callable, Dict, List, Optional, overload
|
@@ -255,7 +256,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
255
256
|
**experiment_config,
|
256
257
|
**{
|
257
258
|
"optimizer": self.__class__.__name__,
|
258
|
-
"metric": metric
|
259
|
+
"metric": getattr(metric, '__name__', str(metric)),
|
259
260
|
"dataset": dataset.name,
|
260
261
|
"configuration": {
|
261
262
|
"prompt": prompt.formatted_messages,
|
@@ -300,7 +301,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
300
301
|
|
301
302
|
# Use dataset's get_items with limit for sampling
|
302
303
|
logger.debug(
|
303
|
-
f"Starting evaluation with {subset_size if subset_size else 'all'} samples for metric: {metric
|
304
|
+
f"Starting evaluation with {subset_size if subset_size else 'all'} samples for metric: {getattr(metric, '__name__', str(metric))}"
|
304
305
|
)
|
305
306
|
score = task_evaluator.evaluate(
|
306
307
|
dataset=dataset,
|
@@ -341,8 +342,15 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
341
342
|
Returns:
|
342
343
|
OptimizationResult: Structured result containing optimization details
|
343
344
|
"""
|
344
|
-
|
345
|
+
if not isinstance(prompt, chat_prompt.ChatPrompt):
|
346
|
+
raise ValueError("Prompt must be a ChatPrompt object")
|
345
347
|
|
348
|
+
if not isinstance(dataset, Dataset):
|
349
|
+
raise ValueError("Dataset must be a Dataset object")
|
350
|
+
|
351
|
+
if not isinstance(metric, Callable):
|
352
|
+
raise ValueError("Metric must be a function that takes `dataset_item` and `llm_output` as arguments.")
|
353
|
+
|
346
354
|
total_items = len(dataset.get_items())
|
347
355
|
if n_samples is not None and n_samples > total_items:
|
348
356
|
logger.warning(
|
@@ -350,21 +358,12 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
350
358
|
)
|
351
359
|
n_samples = None
|
352
360
|
|
353
|
-
|
354
|
-
messages=prompt.formatted_messages,
|
355
|
-
optimizer_config={
|
356
|
-
"optimizer": self.__class__.__name__,
|
357
|
-
"n_samples": n_samples,
|
358
|
-
"auto_continue": auto_continue
|
359
|
-
},
|
360
|
-
verbose=self.verbose
|
361
|
-
)
|
362
|
-
|
361
|
+
|
363
362
|
optimization = None
|
364
363
|
try:
|
365
364
|
optimization = self._opik_client.create_optimization(
|
366
365
|
dataset_name=dataset.name,
|
367
|
-
objective_name=metric
|
366
|
+
objective_name=getattr(metric, '__name__', str(metric)),
|
368
367
|
metadata={"optimizer": self.__class__.__name__},
|
369
368
|
)
|
370
369
|
logger.debug(f"Created optimization with ID: {optimization.id}")
|
@@ -374,6 +373,22 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
374
373
|
)
|
375
374
|
optimization = None
|
376
375
|
|
376
|
+
reporting.display_header(
|
377
|
+
algorithm=self.__class__.__name__,
|
378
|
+
optimization_id=optimization.id if optimization is not None else None,
|
379
|
+
dataset_id=dataset.id,
|
380
|
+
verbose=self.verbose
|
381
|
+
)
|
382
|
+
reporting.display_configuration(
|
383
|
+
messages=prompt.formatted_messages,
|
384
|
+
optimizer_config={
|
385
|
+
"optimizer": self.__class__.__name__,
|
386
|
+
"n_samples": n_samples,
|
387
|
+
"auto_continue": auto_continue
|
388
|
+
},
|
389
|
+
verbose=self.verbose
|
390
|
+
)
|
391
|
+
|
377
392
|
try:
|
378
393
|
result = self._optimize_prompt(
|
379
394
|
optimization_id=optimization.id if optimization is not None else None,
|
@@ -411,6 +426,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
411
426
|
self.dataset = dataset
|
412
427
|
self.prompt = prompt
|
413
428
|
self.llm_call_counter = 0 # Reset counter for run
|
429
|
+
initial_prompt: List[Dict[str, str]] = prompt.formatted_messages
|
414
430
|
|
415
431
|
current_prompt = prompt.formatted_messages
|
416
432
|
experiment_config = experiment_config or {}
|
@@ -418,7 +434,7 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
418
434
|
**experiment_config,
|
419
435
|
**{
|
420
436
|
"optimizer": self.__class__.__name__,
|
421
|
-
"metric": metric
|
437
|
+
"metric": getattr(metric, '__name__', str(metric)),
|
422
438
|
"dataset": self.dataset.name,
|
423
439
|
"configuration": {
|
424
440
|
"prompt": current_prompt,
|
@@ -527,11 +543,11 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
527
543
|
|
528
544
|
return self._create_result(
|
529
545
|
metric,
|
530
|
-
|
531
|
-
best_prompt,
|
532
|
-
best_score,
|
533
|
-
initial_score,
|
534
|
-
rounds,
|
546
|
+
initial_prompt=initial_prompt,
|
547
|
+
best_prompt=best_prompt,
|
548
|
+
best_score=best_score,
|
549
|
+
initial_score=initial_score,
|
550
|
+
rounds=rounds,
|
535
551
|
)
|
536
552
|
|
537
553
|
def _calculate_improvement(
|
@@ -581,21 +597,19 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
581
597
|
def _create_result(
|
582
598
|
self,
|
583
599
|
metric: Callable,
|
584
|
-
|
585
|
-
best_prompt: str,
|
600
|
+
initial_prompt: List[Dict[str, str]],
|
601
|
+
best_prompt: List[Dict[str, str]],
|
586
602
|
best_score: float,
|
587
603
|
initial_score: float,
|
588
604
|
rounds: List[OptimizationRound],
|
589
605
|
) -> OptimizationResult:
|
590
606
|
"""Create the final OptimizationResult object."""
|
591
607
|
details = {
|
592
|
-
"initial_prompt": prompt,
|
593
|
-
"initial_score": initial_score,
|
594
608
|
"final_prompt": best_prompt,
|
595
609
|
"final_score": best_score,
|
596
610
|
"rounds": rounds,
|
597
611
|
"total_rounds": len(rounds),
|
598
|
-
"metric_name": metric
|
612
|
+
"metric_name": getattr(metric, '__name__', str(metric)),
|
599
613
|
"model": self.model,
|
600
614
|
"temperature": self.model_kwargs.get("temperature"),
|
601
615
|
}
|
@@ -604,7 +618,9 @@ class MetaPromptOptimizer(BaseOptimizer):
|
|
604
618
|
optimizer=self.__class__.__name__,
|
605
619
|
prompt=best_prompt,
|
606
620
|
score=best_score,
|
607
|
-
|
621
|
+
initial_prompt=initial_prompt,
|
622
|
+
initial_score=initial_score,
|
623
|
+
metric_name=getattr(metric, '__name__', str(metric)),
|
608
624
|
details=details,
|
609
625
|
llm_calls=self.llm_call_counter
|
610
626
|
)
|
@@ -82,6 +82,7 @@ class LM(BaseLM):
|
|
82
82
|
self.finetuning_model = finetuning_model
|
83
83
|
self.launch_kwargs = launch_kwargs or {}
|
84
84
|
self.train_kwargs = train_kwargs or {}
|
85
|
+
self.llm_call_counter = 0
|
85
86
|
|
86
87
|
# Handle model-specific configuration for different model families
|
87
88
|
model_family = model.split("/")[-1].lower() if "/" in model else model.lower()
|
@@ -129,6 +130,7 @@ class LM(BaseLM):
|
|
129
130
|
if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
|
130
131
|
settings.usage_tracker.add_usage(self.model, dict(results.usage))
|
131
132
|
|
133
|
+
self.llm_call_counter += 1
|
132
134
|
return results
|
133
135
|
|
134
136
|
def launch(self, launch_kwargs: Optional[Dict[str, Any]] = None):
|
@@ -323,6 +325,7 @@ def litellm_completion(request: Dict[str, Any], num_retries: int, cache={"no-cac
|
|
323
325
|
**retry_kwargs,
|
324
326
|
**request,
|
325
327
|
)
|
328
|
+
|
326
329
|
chunks = []
|
327
330
|
async for chunk in response:
|
328
331
|
if caller_predict_id:
|
@@ -42,11 +42,10 @@ class MiproOptimizer(BaseOptimizer):
|
|
42
42
|
self.tools = []
|
43
43
|
self.num_threads = self.model_kwargs.pop("num_threads", 6)
|
44
44
|
self.model_kwargs["model"] = self.model
|
45
|
-
self.llm_call_counter = 0
|
46
45
|
# FIXME: add mipro_optimizer=True - It does not count the LLM calls made internally by DSPy during MiproOptimizer.optimizer.compile().
|
47
|
-
lm = LM(**self.model_kwargs)
|
46
|
+
self.lm = LM(**self.model_kwargs)
|
48
47
|
opik_callback = OpikCallback(project_name=self.project_name, log_graph=True)
|
49
|
-
dspy.configure(lm=lm, callbacks=[opik_callback])
|
48
|
+
dspy.configure(lm=self.lm, callbacks=[opik_callback])
|
50
49
|
logger.debug(f"Initialized MiproOptimizer with model: {model}")
|
51
50
|
|
52
51
|
def evaluate_prompt(
|
@@ -54,7 +53,7 @@ class MiproOptimizer(BaseOptimizer):
|
|
54
53
|
dataset: Union[str, Dataset],
|
55
54
|
metric: Callable,
|
56
55
|
task_config: TaskConfig,
|
57
|
-
prompt: Union[str, dspy.Module, OptimizationResult] = None,
|
56
|
+
prompt: Optional[Union[str, dspy.Module, OptimizationResult]] = None,
|
58
57
|
n_samples: int = 10,
|
59
58
|
dataset_item_ids: Optional[List[str]] = None,
|
60
59
|
experiment_config: Optional[Dict] = None,
|
@@ -463,7 +462,7 @@ class MiproOptimizer(BaseOptimizer):
|
|
463
462
|
metric_name=self.opik_metric.__name__ if hasattr(self, 'opik_metric') else "unknown_metric",
|
464
463
|
details={"error": "No candidate programs generated by MIPRO"},
|
465
464
|
history=mipro_history_processed,
|
466
|
-
llm_calls=self.llm_call_counter
|
465
|
+
llm_calls=self.lm.llm_call_counter
|
467
466
|
)
|
468
467
|
|
469
468
|
self.module = self.get_best().details["program"]
|
@@ -488,7 +487,7 @@ class MiproOptimizer(BaseOptimizer):
|
|
488
487
|
demonstrations=best_program_details.demonstrations,
|
489
488
|
details=best_program_details.details,
|
490
489
|
history=mipro_history_processed,
|
491
|
-
llm_calls=self.llm_call_counter
|
490
|
+
llm_calls=self.lm.llm_call_counter
|
492
491
|
)
|
493
492
|
|
494
493
|
def get_best(self, position: int = 0) -> OptimizationResult:
|
@@ -501,7 +500,7 @@ class MiproOptimizer(BaseOptimizer):
|
|
501
500
|
metric_name=getattr(self, 'opik_metric', None).name if hasattr(self, 'opik_metric') and self.opik_metric else "unknown_metric",
|
502
501
|
details={"error": "No programs generated or compile failed"},
|
503
502
|
history=[],
|
504
|
-
llm_calls=self.llm_call_counter
|
503
|
+
llm_calls=self.lm.llm_call_counter
|
505
504
|
)
|
506
505
|
|
507
506
|
score = self.best_programs[position]["score"]
|
@@ -528,5 +527,5 @@ class MiproOptimizer(BaseOptimizer):
|
|
528
527
|
metric_name=self.opik_metric.__name__,
|
529
528
|
demonstrations=demos,
|
530
529
|
details={"program": program_module},
|
531
|
-
llm_calls=self.llm_call_counter
|
530
|
+
llm_calls=self.lm.llm_call_counter
|
532
531
|
)
|
@@ -17,6 +17,10 @@ class OptimizationResult(pydantic.BaseModel):
|
|
17
17
|
score: float
|
18
18
|
metric_name: str
|
19
19
|
|
20
|
+
# Initial score
|
21
|
+
initial_prompt: Optional[List[Dict[Literal["role", "content"], str]]] = None
|
22
|
+
initial_score: Optional[float] = None
|
23
|
+
|
20
24
|
details: Dict[str, Any] = pydantic.Field(default_factory=dict)
|
21
25
|
history: List[Dict[str, Any]] = []
|
22
26
|
llm_calls: Optional[int] = None
|
@@ -33,7 +37,7 @@ class OptimizationResult(pydantic.BaseModel):
|
|
33
37
|
|
34
38
|
def _calculate_improvement_str(self) -> str:
|
35
39
|
"""Helper to calculate improvement percentage string."""
|
36
|
-
initial_s = self.
|
40
|
+
initial_s = self.initial_score
|
37
41
|
final_s = self.score
|
38
42
|
|
39
43
|
# Check if initial score exists and is a number
|
@@ -60,7 +64,7 @@ class OptimizationResult(pydantic.BaseModel):
|
|
60
64
|
"""Provides a clean, well-formatted plain-text summary."""
|
61
65
|
separator = "=" * 80
|
62
66
|
rounds_ran = len(self.details.get("rounds", []))
|
63
|
-
initial_score = self.
|
67
|
+
initial_score = self.initial_score
|
64
68
|
initial_score_str = (
|
65
69
|
f"{initial_score:.4f}" if isinstance(initial_score, (int, float)) else "N/A"
|
66
70
|
)
|
@@ -74,7 +78,6 @@ class OptimizationResult(pydantic.BaseModel):
|
|
74
78
|
.replace("[dim]", "")
|
75
79
|
.replace("[/dim]", "")
|
76
80
|
)
|
77
|
-
stopped_early = self.details.get("stopped_early", "N/A")
|
78
81
|
|
79
82
|
model_name = self.details.get("model", "N/A")
|
80
83
|
temp = self.details.get("temperature")
|
@@ -101,7 +104,6 @@ class OptimizationResult(pydantic.BaseModel):
|
|
101
104
|
f"Final Best Score: {final_score_str}",
|
102
105
|
f"Total Improvement:{improvement_str.rjust(max(0, 18 - len('Total Improvement:')))}",
|
103
106
|
f"Rounds Completed: {rounds_ran}",
|
104
|
-
f"Stopped Early: {stopped_early}",
|
105
107
|
"\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
|
106
108
|
"--------------------------------------------------------------------------------",
|
107
109
|
f"{final_prompt_display}",
|
@@ -114,7 +116,7 @@ class OptimizationResult(pydantic.BaseModel):
|
|
114
116
|
"""Provides a rich, formatted output for terminals supporting Rich."""
|
115
117
|
improvement_str = self._calculate_improvement_str()
|
116
118
|
rounds_ran = len(self.details.get("rounds", []))
|
117
|
-
initial_score = self.
|
119
|
+
initial_score = self.initial_score
|
118
120
|
initial_score_str = (
|
119
121
|
f"{initial_score:.4f}"
|
120
122
|
if isinstance(initial_score, (int, float))
|
@@ -124,8 +126,6 @@ class OptimizationResult(pydantic.BaseModel):
|
|
124
126
|
stopped_early = self.details.get("stopped_early", "N/A")
|
125
127
|
|
126
128
|
model_name = self.details.get("model", "[dim]N/A[/dim]")
|
127
|
-
temp = self.details.get("temperature")
|
128
|
-
temp_str = f"{temp:.1f}" if isinstance(temp, (int, float)) else "[dim]N/A[/dim]"
|
129
129
|
|
130
130
|
table = rich.table.Table.grid(padding=(0, 1))
|
131
131
|
table.add_column(style="dim")
|
@@ -135,13 +135,12 @@ class OptimizationResult(pydantic.BaseModel):
|
|
135
135
|
"Optimizer:",
|
136
136
|
f"[bold]{self.optimizer}[/bold]",
|
137
137
|
)
|
138
|
-
table.add_row("Model Used:", f"{model_name}
|
138
|
+
table.add_row("Model Used:", f"{model_name}")
|
139
139
|
table.add_row("Metric Evaluated:", f"[bold]{self.metric_name}[/bold]")
|
140
140
|
table.add_row("Initial Score:", initial_score_str)
|
141
141
|
table.add_row("Final Best Score:", f"[bold cyan]{final_score_str}[/bold cyan]")
|
142
142
|
table.add_row("Total Improvement:", improvement_str)
|
143
143
|
table.add_row("Rounds Completed:", str(rounds_ran))
|
144
|
-
table.add_row("Stopped Early:", str(stopped_early))
|
145
144
|
|
146
145
|
# Display Chat Structure if available
|
147
146
|
panel_title = "[bold]Final Optimized Prompt[/bold]"
|
@@ -2,13 +2,14 @@ import logging
|
|
2
2
|
from contextlib import contextmanager
|
3
3
|
from typing import Dict, List, Optional
|
4
4
|
|
5
|
-
import rich
|
6
5
|
from rich import box
|
7
6
|
from rich.console import Console, Group
|
8
7
|
from rich.panel import Panel
|
9
8
|
from rich.progress import track
|
10
9
|
from rich.text import Text
|
11
10
|
|
11
|
+
from .utils import get_optimization_run_url_by_id
|
12
|
+
|
12
13
|
PANEL_WIDTH = 70
|
13
14
|
|
14
15
|
def get_console(*args, **kwargs):
|
@@ -21,10 +22,8 @@ def convert_tqdm_to_rich(description: Optional[str] = None, verbose: int = 1):
|
|
21
22
|
"""Context manager to convert tqdm to rich."""
|
22
23
|
import opik.evaluation.engine.evaluation_tasks_executor
|
23
24
|
|
24
|
-
optimizer_logger = logging.getLogger('opik_optimizer')
|
25
|
-
|
26
25
|
def _tqdm_to_track(iterable, desc, disable, total):
|
27
|
-
disable = verbose == 0
|
26
|
+
disable = verbose == 0
|
28
27
|
return track(
|
29
28
|
iterable,
|
30
29
|
description=description or desc,
|
@@ -91,16 +90,36 @@ def display_messages(messages: List[Dict[str, str]], prefix: str = ""):
|
|
91
90
|
for line in rendered_panel.splitlines():
|
92
91
|
console.print(Text(prefix) + Text.from_ansi(line))
|
93
92
|
|
94
|
-
def display_header(
|
93
|
+
def display_header(
|
94
|
+
algorithm: str,
|
95
|
+
optimization_id: Optional[str]=None,
|
96
|
+
dataset_id: Optional[str]=None,
|
97
|
+
verbose: int = 1
|
98
|
+
):
|
95
99
|
if verbose < 1:
|
96
100
|
return
|
97
101
|
|
102
|
+
if optimization_id is not None and dataset_id is not None:
|
103
|
+
optimization_url = get_optimization_run_url_by_id(
|
104
|
+
optimization_id=optimization_id,
|
105
|
+
dataset_id=dataset_id
|
106
|
+
)
|
107
|
+
|
108
|
+
# Create a visually appealing panel with an icon and ensure link doesn't wrap
|
109
|
+
|
110
|
+
link_text = Text("-> View optimization details in your Opik dashboard")
|
111
|
+
link_text.stylize(f"link {optimization_url}", 28, len(link_text))
|
112
|
+
else:
|
113
|
+
link_text = Text("No optimization run link available", style="dim")
|
114
|
+
|
98
115
|
content = Text.assemble(
|
99
116
|
("● ", "green"),
|
100
117
|
"Running Opik Evaluation - ",
|
101
|
-
(algorithm, "blue")
|
102
|
-
|
118
|
+
(algorithm, "blue"),
|
119
|
+
"\n\n"
|
120
|
+
).append(link_text)
|
103
121
|
|
122
|
+
|
104
123
|
panel = Panel(
|
105
124
|
content,
|
106
125
|
box=box.ROUNDED,
|
@@ -126,7 +145,7 @@ def display_result(initial_score, best_score, best_prompt, verbose: int = 1):
|
|
126
145
|
perc_change = (best_score - initial_score) / initial_score
|
127
146
|
content = [Text(f"Prompt was optimized and improved from {initial_score:.4f} to {best_score:.4f} ({perc_change:.2%})", style="bold green")]
|
128
147
|
else:
|
129
|
-
content = [Text("Optimization
|
148
|
+
content = [Text(f"Optimization run did not find a better prompt than the initial one.\nScore: {best_score:.4f}", style="dim bold red")]
|
130
149
|
|
131
150
|
content.append(Text("\nOptimized prompt:"))
|
132
151
|
for i, msg in enumerate(best_prompt):
|
opik_optimizer/utils.py
CHANGED
@@ -1,23 +1,17 @@
|
|
1
1
|
"""Utility functions and constants for the optimizer package."""
|
2
2
|
|
3
|
-
|
4
|
-
from types import TracebackType
|
5
|
-
|
6
|
-
import opik
|
7
|
-
from opik.api_objects.opik_client import Opik
|
8
|
-
from opik.api_objects.optimization import Optimization
|
9
|
-
|
3
|
+
import base64
|
10
4
|
import json
|
11
5
|
import logging
|
12
6
|
import random
|
13
7
|
import string
|
14
|
-
import base64
|
15
8
|
import urllib.parse
|
16
|
-
from
|
9
|
+
from types import TracebackType
|
10
|
+
from typing import Any, Dict, Final, Literal, Optional, Type
|
17
11
|
|
18
|
-
|
19
|
-
|
20
|
-
|
12
|
+
import opik
|
13
|
+
from opik.api_objects.opik_client import Opik
|
14
|
+
from opik.api_objects.optimization import Optimization
|
21
15
|
|
22
16
|
ALLOWED_URL_CHARACTERS: Final[str] = ":/&?="
|
23
17
|
logger = logging.getLogger(__name__)
|
@@ -63,6 +57,7 @@ class OptimizationContextManager:
|
|
63
57
|
name=self.name,
|
64
58
|
metadata=self.metadata,
|
65
59
|
)
|
60
|
+
|
66
61
|
if self.optimization:
|
67
62
|
return self.optimization
|
68
63
|
else:
|
@@ -238,8 +233,10 @@ def ensure_ending_slash(url: str) -> str:
|
|
238
233
|
|
239
234
|
|
240
235
|
def get_optimization_run_url_by_id(
|
241
|
-
dataset_id: str, optimization_id: str
|
236
|
+
dataset_id: str, optimization_id: str
|
242
237
|
) -> str:
|
238
|
+
opik_config = opik.config.get_from_user_inputs()
|
239
|
+
url_override = opik_config.url_override
|
243
240
|
encoded_opik_url = base64.b64encode(url_override.encode("utf-8")).decode("utf-8")
|
244
241
|
|
245
242
|
run_path = urllib.parse.quote(
|
@@ -247,18 +244,3 @@ def get_optimization_run_url_by_id(
|
|
247
244
|
safe=ALLOWED_URL_CHARACTERS,
|
248
245
|
)
|
249
246
|
return urllib.parse.urljoin(ensure_ending_slash(url_override), run_path)
|
250
|
-
|
251
|
-
|
252
|
-
def display_optimization_run_link(
|
253
|
-
optimization_id: str, dataset_id: str, url_override: str
|
254
|
-
) -> None:
|
255
|
-
console_container = console.Console()
|
256
|
-
|
257
|
-
optimization_url = get_optimization_run_url_by_id(
|
258
|
-
optimization_id=optimization_id,
|
259
|
-
dataset_id=dataset_id,
|
260
|
-
url_override=url_override,
|
261
|
-
)
|
262
|
-
console_container.print(
|
263
|
-
f"View the optimization run [link={optimization_url}]in your Opik dashboard[/link]."
|
264
|
-
)
|
@@ -3,10 +3,10 @@ opik_optimizer/_throttle.py,sha256=ztub8qlwz4u0GVA2TIoLig0D1Cs0hJ7_o_SnT_C7Nmk,1
|
|
3
3
|
opik_optimizer/base_optimizer.py,sha256=mg5D5m2hIzq3XbVjRhx8c_HuXWZWaRE2J6QtkHnxkqE,4439
|
4
4
|
opik_optimizer/cache_config.py,sha256=EzF4RAzxhSG8vtMJANdiUpNHQ9HzL2CrCXp0iik0f4A,580
|
5
5
|
opik_optimizer/logging_config.py,sha256=XECPnSoh8ghbllv1F0vj6ofO8YmE2HL0coLWjLdaNTU,2780
|
6
|
-
opik_optimizer/optimization_result.py,sha256=
|
7
|
-
opik_optimizer/reporting_utils.py,sha256=
|
6
|
+
opik_optimizer/optimization_result.py,sha256=p_vHW5NtdeJHJ9KpmbM8JMd3CqG_wd02XzClRL7mMgs,7270
|
7
|
+
opik_optimizer/reporting_utils.py,sha256=dL1u8Wpo7S9H480FxDENsxFe301PdTqHcXbFNqqXdik,6175
|
8
8
|
opik_optimizer/task_evaluator.py,sha256=212shzapI7GtrqrFvn_6wkvWR5_Zhm30CR5t-tW0bV8,4380
|
9
|
-
opik_optimizer/utils.py,sha256=
|
9
|
+
opik_optimizer/utils.py,sha256=mf-773TJH5Cm6cQXcsntEOQ-6WEaYUaILIonuaEErmo,7707
|
10
10
|
opik_optimizer/data/hotpot-500.json,sha256=YXxCtuvYvxSu5u0y4559a6b1qwgAYsWzT_SUKv_21ew,76862
|
11
11
|
opik_optimizer/datasets/__init__.py,sha256=V4LVDOaRjwzaYvhdQ3V6CAwFaeKnxyTV1lp_ES9Z31E,691
|
12
12
|
opik_optimizer/datasets/ai2_arc.py,sha256=PMWInWVRPQ9u_nlr9N531CeVKjI6y_ZSQmNY2t1zwOI,1401
|
@@ -24,25 +24,25 @@ opik_optimizer/demo/__init__.py,sha256=KSpFYhzN7fTmLEsIaciRHwxcJDeAiX5NDmYLdPsfp
|
|
24
24
|
opik_optimizer/demo/cache.py,sha256=5WqK8rSiijzU6s4VHIjLuL1LR5i1yHtY-x5FZTduSus,3669
|
25
25
|
opik_optimizer/demo/datasets.py,sha256=MezQlG4Q_cgSH7zQOmJcDwkGU8JV0xKSnZwCJGaj-88,2494
|
26
26
|
opik_optimizer/evolutionary_optimizer/__init__.py,sha256=OQ2ART5g-7EVGOISvTGY-AbmEyyDFEJJCsmJBzGJIpw,57
|
27
|
-
opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py,sha256=
|
28
|
-
opik_optimizer/evolutionary_optimizer/reporting.py,sha256=
|
27
|
+
opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py,sha256=tZLID_N4auZ7v7MWZesr9xbzRMudznAUQ8Q9XzhbqRY,76948
|
28
|
+
opik_optimizer/evolutionary_optimizer/reporting.py,sha256=w5fWIA9F30a25E5BpPxKLubNitcXmU5KzeBdS86Ajao,9899
|
29
29
|
opik_optimizer/few_shot_bayesian_optimizer/__init__.py,sha256=VuH7FOROyGcjMPryejtZC-5Y0QHlVTFLTGUDgNqRAFw,113
|
30
|
-
opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py,sha256=
|
30
|
+
opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py,sha256=PwpfrZCauun0zwne4Kj1l3V9KQjX5phoz61FFZN9MHk,25660
|
31
31
|
opik_optimizer/few_shot_bayesian_optimizer/reporting.py,sha256=j1mNEQyFT7YUVlMU1TxPZxrf5sPwiHZ2nx1fOL4ZIog,4756
|
32
32
|
opik_optimizer/meta_prompt_optimizer/__init__.py,sha256=syiN2_fMm5iZDQezZCHYe-ZiGOIPlBkLt49Sa1kuR70,97
|
33
|
-
opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py,sha256=
|
34
|
-
opik_optimizer/meta_prompt_optimizer/reporting.py,sha256=
|
33
|
+
opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py,sha256=X5HaT1enrwp_oYK1vuBhaDLBW7A3apP4mlElr0SDQhY,35308
|
34
|
+
opik_optimizer/meta_prompt_optimizer/reporting.py,sha256=IXV1R3T4cdVOJBTEgACQEp5zhrRcDg0DWuebexcfN60,5753
|
35
35
|
opik_optimizer/mipro_optimizer/__init__.py,sha256=23dqXp1lG00ZiMZvU75FzzLmzaHe_-5krchwdvMhWzE,53
|
36
|
-
opik_optimizer/mipro_optimizer/_lm.py,sha256=
|
36
|
+
opik_optimizer/mipro_optimizer/_lm.py,sha256=hqSdGR0DsuiXAjxfiIFAdFxeG5cfcMtRc7jzXa74fKM,16446
|
37
37
|
opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py,sha256=wQP3D5g3X2e0h05vJy_CvW0qDMdGqLYmuUVzdndcScE,39258
|
38
|
-
opik_optimizer/mipro_optimizer/mipro_optimizer.py,sha256=
|
38
|
+
opik_optimizer/mipro_optimizer/mipro_optimizer.py,sha256=lQcElTUwkJKKiF6eRuxcRKETq6_TvyuGXLJGw6mIIS4,23303
|
39
39
|
opik_optimizer/mipro_optimizer/utils.py,sha256=-d9xOKxmYbKwpNM2aheKQVf3gxCh5B1ENuAvzc38xe8,2509
|
40
40
|
opik_optimizer/optimization_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
41
41
|
opik_optimizer/optimization_config/chat_prompt.py,sha256=nGSElB4CyOegYi06oFmaVeFKI6XRv6LJOWJ1axhjyyY,3500
|
42
42
|
opik_optimizer/optimization_config/configs.py,sha256=HzpEP84bnqtDs76dtmPGecDQ-Ux2wIk0JVv7A2gsE3k,496
|
43
43
|
opik_optimizer/optimization_config/mappers.py,sha256=RMUWwYvXNCJe6w1jYiT6EX218UYZS1PUMMe12OjNEug,1692
|
44
|
-
opik_optimizer-0.9.
|
45
|
-
opik_optimizer-0.9.
|
46
|
-
opik_optimizer-0.9.
|
47
|
-
opik_optimizer-0.9.
|
48
|
-
opik_optimizer-0.9.
|
44
|
+
opik_optimizer-0.9.2.dist-info/licenses/LICENSE,sha256=dTRSwwCHdWeSjzodvnivYqcwi8x3Qfr21yv65QUWWBE,1062
|
45
|
+
opik_optimizer-0.9.2.dist-info/METADATA,sha256=e2QQWNkEFwPqHKGAnfBGrR-pQhhR-vOBrrtHaszA3lk,6588
|
46
|
+
opik_optimizer-0.9.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
47
|
+
opik_optimizer-0.9.2.dist-info/top_level.txt,sha256=ondOlpq6_yFckqpxoAHSfzZS2N-JfgmA-QQhOJfz7m0,15
|
48
|
+
opik_optimizer-0.9.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|