opik-optimizer 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. opik_optimizer/__init__.py +15 -26
  2. opik_optimizer/base_optimizer.py +28 -44
  3. opik_optimizer/datasets/__init__.py +6 -7
  4. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +742 -726
  5. opik_optimizer/evolutionary_optimizer/reporting.py +246 -0
  6. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +297 -193
  7. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +119 -0
  8. opik_optimizer/meta_prompt_optimizer/__init__.py +5 -0
  9. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +816 -0
  10. opik_optimizer/meta_prompt_optimizer/reporting.py +140 -0
  11. opik_optimizer/mipro_optimizer/__init__.py +1 -1
  12. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +12 -20
  13. opik_optimizer/mipro_optimizer/mipro_optimizer.py +32 -52
  14. opik_optimizer/mipro_optimizer/utils.py +1 -23
  15. opik_optimizer/optimization_config/chat_prompt.py +106 -0
  16. opik_optimizer/optimization_config/configs.py +2 -21
  17. opik_optimizer/optimization_config/mappers.py +1 -1
  18. opik_optimizer/optimization_result.py +57 -85
  19. opik_optimizer/reporting_utils.py +180 -0
  20. opik_optimizer/task_evaluator.py +41 -26
  21. opik_optimizer/utils.py +187 -3
  22. {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0.dist-info}/METADATA +15 -31
  23. opik_optimizer-0.9.0.dist-info/RECORD +48 -0
  24. {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0.dist-info}/WHEEL +1 -1
  25. opik_optimizer/few_shot_bayesian_optimizer/prompt_parameter.py +0 -91
  26. opik_optimizer/few_shot_bayesian_optimizer/prompt_templates.py +0 -80
  27. opik_optimizer/integrations/__init__.py +0 -0
  28. opik_optimizer/meta_prompt_optimizer.py +0 -1151
  29. opik_optimizer-0.8.1.dist-info/RECORD +0 -45
  30. {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0.dist-info}/licenses/LICENSE +0 -0
  31. {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0.dist-info}/top_level.txt +0 -0
@@ -1,47 +1,36 @@
1
1
  import importlib.metadata
2
2
  import logging
3
- from .logging_config import setup_logging
4
3
 
5
- __version__ = importlib.metadata.version("opik_optimizer")
4
+ from opik.evaluation.models.litellm import warning_filters
6
5
 
7
- # Using WARNING as a sensible default to avoid flooding users with INFO/DEBUG
8
- setup_logging(level=logging.WARNING)
6
+ from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import EvolutionaryOptimizer
9
7
 
10
- # Regular imports
11
- from .mipro_optimizer import MiproOptimizer
8
+ from . import datasets
12
9
  from .base_optimizer import BaseOptimizer
13
- from .meta_prompt_optimizer import MetaPromptOptimizer
14
10
  from .few_shot_bayesian_optimizer import FewShotBayesianOptimizer
15
- from .optimization_config.configs import (
16
- MetricConfig,
17
- OptimizationConfig,
18
- TaskConfig,
19
- )
20
- from .optimization_config.mappers import (
21
- from_dataset_field,
22
- from_llm_response_text,
23
- )
11
+ from .logging_config import setup_logging
12
+ from .meta_prompt_optimizer import MetaPromptOptimizer
13
+ from .mipro_optimizer import MiproOptimizer
14
+ from .optimization_config.chat_prompt import ChatPrompt
15
+ from .optimization_config.configs import TaskConfig
16
+ from .optimization_result import OptimizationResult
24
17
 
25
- from opik.evaluation.models.litellm import warning_filters
26
- from . import datasets
18
+ __version__ = importlib.metadata.version("opik_optimizer")
27
19
 
28
- warning_filters.add_warning_filters()
20
+ # Using WARNING as a sensible default to avoid flooding users with INFO/DEBUG
21
+ setup_logging(level=logging.WARNING)
29
22
 
30
- from .optimization_result import OptimizationResult
31
- from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import EvolutionaryOptimizer
23
+ warning_filters.add_warning_filters()
32
24
 
33
25
  __all__ = [
34
26
  "BaseOptimizer",
27
+ "ChatPrompt",
35
28
  "FewShotBayesianOptimizer",
36
29
  "MetaPromptOptimizer",
37
30
  "MiproOptimizer",
38
31
  "EvolutionaryOptimizer",
39
- "MetricConfig",
40
- "OptimizationConfig",
41
- "TaskConfig",
42
- "from_dataset_field",
43
- "from_llm_response_text",
44
32
  "OptimizationResult",
45
33
  "setup_logging",
46
34
  "datasets",
35
+ "TaskConfig"
47
36
  ]
@@ -1,16 +1,16 @@
1
- from typing import Optional, Union, List, Dict, Any
2
- import opik
3
1
  import logging
4
2
  import time
3
+ from abc import abstractmethod
4
+ from typing import Any, Callable, Dict, List, Optional
5
5
 
6
6
  import litellm
7
- from . import _throttle
7
+ import opik
8
8
  from opik.rest_api.core import ApiError
9
-
10
9
  from pydantic import BaseModel
10
+
11
+ from . import _throttle, optimization_result
11
12
  from .cache_config import initialize_cache
12
- from opik.evaluation.models.litellm import opik_monitor as opik_litellm_monitor
13
- from .optimization_config.configs import TaskConfig, MetricConfig
13
+ from .optimization_config import chat_prompt
14
14
 
15
15
  _limiter = _throttle.get_rate_limiter_for_current_opik_installation()
16
16
 
@@ -22,11 +22,13 @@ logger = logging.getLogger(__name__)
22
22
 
23
23
 
24
24
  class OptimizationRound(BaseModel):
25
+ model_config = {"arbitrary_types_allowed": True}
26
+
25
27
  round_number: int
26
- current_prompt: str
28
+ current_prompt: "chat_prompt.ChatPrompt"
27
29
  current_score: float
28
- generated_prompts: List[Dict[str, Any]]
29
- best_prompt: str
30
+ generated_prompts: Any
31
+ best_prompt: "chat_prompt.ChatPrompt"
30
32
  best_score: float
31
33
  improvement: float
32
34
 
@@ -54,46 +56,37 @@ class BaseOptimizer:
54
56
  # Initialize shared cache
55
57
  initialize_cache()
56
58
 
59
+ @abstractmethod
57
60
  def optimize_prompt(
58
61
  self,
59
- dataset: Union[str, opik.Dataset],
60
- metric_config: MetricConfig,
61
- task_config: TaskConfig,
62
- prompt: str,
63
- input_key: str,
64
- output_key: str,
62
+ prompt: chat_prompt.ChatPrompt,
63
+ dataset: opik.Dataset,
64
+ metrics: List[Callable],
65
65
  experiment_config: Optional[Dict] = None,
66
66
  **kwargs,
67
- ):
67
+ ) -> optimization_result.OptimizationResult:
68
68
  """
69
69
  Optimize a prompt.
70
70
 
71
71
  Args:
72
72
  dataset: Opik dataset name, or Opik dataset
73
- metric_config: instance of a MetricConfig
74
- task_config: instance of a TaskConfig
73
+ metrics: A list of metric functions, these functions should have two arguments:
74
+ dataset_item and llm_output
75
75
  prompt: the prompt to optimize
76
76
  input_key: input field of dataset
77
77
  output_key: output field of dataset
78
78
  experiment_config: Optional configuration for the experiment
79
79
  **kwargs: Additional arguments for optimization
80
80
  """
81
- self.dataset = dataset
82
- self.metric = metric
83
- self.prompt = prompt
84
- self.input_key = input_key
85
- self.output_key = output_key
86
- self.experiment_config = experiment_config
81
+ pass
87
82
 
83
+ @abstractmethod
88
84
  def evaluate_prompt(
89
85
  self,
90
- dataset: Union[str, opik.Dataset],
91
- metric_config: MetricConfig,
92
- prompt: str,
93
- input_key: str,
94
- output_key: str,
95
- n_samples: int = 10,
96
- task_config: Optional[TaskConfig] = None,
86
+ prompt: chat_prompt.ChatPrompt,
87
+ dataset: opik.Dataset,
88
+ metrics: List[Callable],
89
+ n_samples: Optional[int] = None,
97
90
  dataset_item_ids: Optional[List[str]] = None,
98
91
  experiment_config: Optional[Dict] = None,
99
92
  **kwargs,
@@ -102,12 +95,10 @@ class BaseOptimizer:
102
95
  Evaluate a prompt.
103
96
 
104
97
  Args:
105
- dataset: Opik dataset name, or Opik dataset
106
- metric_config: instance of a MetricConfig
107
- task_config: instance of a TaskConfig
108
98
  prompt: the prompt to evaluate
109
- input_key: input field of dataset
110
- output_key: output field of dataset
99
+ dataset: Opik dataset name, or Opik dataset
100
+ metrics: A list of metric functions, these functions should have two arguments:
101
+ dataset_item and llm_output
111
102
  n_samples: number of items to test in the dataset
112
103
  dataset_item_ids: Optional list of dataset item IDs to evaluate
113
104
  experiment_config: Optional configuration for the experiment
@@ -116,14 +107,7 @@ class BaseOptimizer:
116
107
  Returns:
117
108
  float: The evaluation score
118
109
  """
119
- self.dataset = dataset
120
- self.metric_config = metric_config
121
- self.task_config = task_config
122
- self.prompt = prompt
123
- self.input_key = input_key
124
- self.output_key = output_key
125
- self.experiment_config = experiment_config
126
- return 0.0 # Base implementation returns 0
110
+ pass
127
111
 
128
112
  def get_history(self) -> List[Dict[str, Any]]:
129
113
  """
@@ -1,15 +1,14 @@
1
- from .hotpot_qa import hotpot_300, hotpot_500
2
- from .halu_eval import halu_eval_300
3
- from .tiny_test import tiny_test
4
- from .gsm8k import gsm8k
5
1
  from .ai2_arc import ai2_arc
6
- from .truthful_qa import truthful_qa
7
2
  from .cnn_dailymail import cnn_dailymail
8
- from .ragbench import ragbench_sentence_relevance
9
3
  from .election_questions import election_questions
4
+ from .gsm8k import gsm8k
5
+ from .halu_eval import halu_eval_300
6
+ from .hotpot_qa import hotpot_300, hotpot_500
10
7
  from .medhallu import medhallu
11
8
  from .rag_hallucinations import rag_hallucinations
12
-
9
+ from .ragbench import ragbench_sentence_relevance
10
+ from .tiny_test import tiny_test
11
+ from .truthful_qa import truthful_qa
13
12
 
14
13
  __all__ = [
15
14
  "hotpot_300",