opik-optimizer 0.8.1__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opik_optimizer-0.8.1/src/opik_optimizer.egg-info → opik_optimizer-0.9.0}/PKG-INFO +15 -31
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/README.md +14 -30
- opik_optimizer-0.9.0/pyproject.toml +3 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/setup.py +3 -3
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/__init__.py +15 -26
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/base_optimizer.py +28 -44
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/__init__.py +6 -7
- opik_optimizer-0.9.0/src/opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +1465 -0
- opik_optimizer-0.9.0/src/opik_optimizer/evolutionary_optimizer/reporting.py +246 -0
- opik_optimizer-0.9.0/src/opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +589 -0
- opik_optimizer-0.9.0/src/opik_optimizer/few_shot_bayesian_optimizer/reporting.py +119 -0
- opik_optimizer-0.9.0/src/opik_optimizer/meta_prompt_optimizer/__init__.py +5 -0
- opik_optimizer-0.9.0/src/opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +816 -0
- opik_optimizer-0.9.0/src/opik_optimizer/meta_prompt_optimizer/reporting.py +140 -0
- opik_optimizer-0.9.0/src/opik_optimizer/mipro_optimizer/__init__.py +1 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +12 -20
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/mipro_optimizer/mipro_optimizer.py +32 -52
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/mipro_optimizer/utils.py +1 -23
- opik_optimizer-0.9.0/src/opik_optimizer/optimization_config/chat_prompt.py +106 -0
- opik_optimizer-0.9.0/src/opik_optimizer/optimization_config/configs.py +16 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/optimization_config/mappers.py +1 -1
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/optimization_result.py +57 -85
- opik_optimizer-0.9.0/src/opik_optimizer/reporting_utils.py +180 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/task_evaluator.py +41 -26
- opik_optimizer-0.9.0/src/opik_optimizer/utils.py +264 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0/src/opik_optimizer.egg-info}/PKG-INFO +15 -31
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer.egg-info/SOURCES.txt +8 -4
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer.egg-info/requires.txt +1 -1
- opik_optimizer-0.8.1/src/opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +0 -1449
- opik_optimizer-0.8.1/src/opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +0 -485
- opik_optimizer-0.8.1/src/opik_optimizer/few_shot_bayesian_optimizer/prompt_parameter.py +0 -91
- opik_optimizer-0.8.1/src/opik_optimizer/few_shot_bayesian_optimizer/prompt_templates.py +0 -80
- opik_optimizer-0.8.1/src/opik_optimizer/meta_prompt_optimizer.py +0 -1151
- opik_optimizer-0.8.1/src/opik_optimizer/mipro_optimizer/__init__.py +0 -1
- opik_optimizer-0.8.1/src/opik_optimizer/optimization_config/__init__.py +0 -0
- opik_optimizer-0.8.1/src/opik_optimizer/optimization_config/configs.py +0 -35
- opik_optimizer-0.8.1/src/opik_optimizer/utils.py +0 -80
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/LICENSE +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/setup.cfg +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/_throttle.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/cache_config.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/data/hotpot-500.json +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/ai2_arc.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/cnn_dailymail.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/election_questions.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/gsm8k.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/halu_eval.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/hotpot_qa.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/medhallu.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/rag_hallucinations.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/ragbench.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/tiny_test.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/datasets/truthful_qa.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/demo/__init__.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/demo/cache.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/demo/datasets.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/evolutionary_optimizer/__init__.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/few_shot_bayesian_optimizer/__init__.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/logging_config.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer/mipro_optimizer/_lm.py +0 -0
- {opik_optimizer-0.8.1/src/opik_optimizer/integrations → opik_optimizer-0.9.0/src/opik_optimizer/optimization_config}/__init__.py +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer.egg-info/dependency_links.txt +0 -0
- {opik_optimizer-0.8.1 → opik_optimizer-0.9.0}/src/opik_optimizer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: opik_optimizer
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.9.0
|
4
4
|
Summary: Agent optimization with Opik
|
5
5
|
Home-page: https://github.com/comet-ml/opik
|
6
6
|
Author: Comet ML
|
@@ -13,7 +13,7 @@ Requires-Python: >=3.9,<3.13
|
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
15
|
Requires-Dist: opik>=1.7.17
|
16
|
-
Requires-Dist: dspy
|
16
|
+
Requires-Dist: dspy<=2.6.24,>=2.6.18
|
17
17
|
Requires-Dist: litellm
|
18
18
|
Requires-Dist: tqdm
|
19
19
|
Requires-Dist: datasets
|
@@ -113,24 +113,20 @@ Available sample datasets for testing:
|
|
113
113
|
|
114
114
|
```python
|
115
115
|
from opik.evaluation.metrics import LevenshteinRatio
|
116
|
-
from opik_optimizer import FewShotBayesianOptimizer
|
117
|
-
from opik_optimizer.
|
118
|
-
|
119
|
-
from opik_optimizer import (
|
120
|
-
MetricConfig,
|
121
|
-
TaskConfig,
|
122
|
-
from_dataset_field,
|
123
|
-
from_llm_response_text,
|
124
|
-
)
|
116
|
+
from opik_optimizer import FewShotBayesianOptimizer, ChatPrompt
|
117
|
+
from opik_optimizer.datasets import hotpot_300
|
125
118
|
|
126
119
|
# Load a sample dataset
|
127
|
-
hot_pot_dataset =
|
120
|
+
hot_pot_dataset = hotpot_300()
|
128
121
|
|
129
122
|
# Define the instruction for your chat prompt.
|
130
123
|
# Input parameters from dataset examples will be interpolated into the full prompt.
|
131
|
-
|
132
|
-
|
133
|
-
"""
|
124
|
+
prompt = ChatPrompt(
|
125
|
+
messages=[
|
126
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
127
|
+
{"role": "user", "content": "{question}"}
|
128
|
+
]
|
129
|
+
)
|
134
130
|
project_name = "optimize-few-shot-bayesian-hotpot" # For Comet logging
|
135
131
|
|
136
132
|
optimizer = FewShotBayesianOptimizer(
|
@@ -142,26 +138,14 @@ optimizer = FewShotBayesianOptimizer(
|
|
142
138
|
seed=42,
|
143
139
|
)
|
144
140
|
|
145
|
-
|
146
|
-
|
147
|
-
inputs={
|
148
|
-
"output": from_llm_response_text(), # Get output from LLM
|
149
|
-
"reference": from_dataset_field(name="answer"), # Get reference from dataset
|
150
|
-
},
|
151
|
-
)
|
152
|
-
|
153
|
-
task_config = TaskConfig(
|
154
|
-
instruction_prompt=prompt_instruction,
|
155
|
-
input_dataset_fields=["question"], # Fields from dataset to use as input
|
156
|
-
output_dataset_field="answer", # Field in dataset for reference answer
|
157
|
-
use_chat_prompt=True, # Use chat-style prompting
|
158
|
-
)
|
141
|
+
def levenshtein_ratio(dataset_item, llm_output):
|
142
|
+
return LevenshteinRatio().score(reference=dataset_item["answer"], output=llm_output)
|
159
143
|
|
160
144
|
# Run the optimization
|
161
145
|
result = optimizer.optimize_prompt(
|
146
|
+
prompt=prompt,
|
162
147
|
dataset=hot_pot_dataset,
|
163
|
-
|
164
|
-
task_config=task_config,
|
148
|
+
metric=levenshtein_ratio,
|
165
149
|
n_trials=10, # Number of optimization trials
|
166
150
|
n_samples=150, # Number of dataset samples for evaluation per trial
|
167
151
|
)
|
@@ -73,24 +73,20 @@ Available sample datasets for testing:
|
|
73
73
|
|
74
74
|
```python
|
75
75
|
from opik.evaluation.metrics import LevenshteinRatio
|
76
|
-
from opik_optimizer import FewShotBayesianOptimizer
|
77
|
-
from opik_optimizer.
|
78
|
-
|
79
|
-
from opik_optimizer import (
|
80
|
-
MetricConfig,
|
81
|
-
TaskConfig,
|
82
|
-
from_dataset_field,
|
83
|
-
from_llm_response_text,
|
84
|
-
)
|
76
|
+
from opik_optimizer import FewShotBayesianOptimizer, ChatPrompt
|
77
|
+
from opik_optimizer.datasets import hotpot_300
|
85
78
|
|
86
79
|
# Load a sample dataset
|
87
|
-
hot_pot_dataset =
|
80
|
+
hot_pot_dataset = hotpot_300()
|
88
81
|
|
89
82
|
# Define the instruction for your chat prompt.
|
90
83
|
# Input parameters from dataset examples will be interpolated into the full prompt.
|
91
|
-
|
92
|
-
|
93
|
-
"""
|
84
|
+
prompt = ChatPrompt(
|
85
|
+
messages=[
|
86
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
87
|
+
{"role": "user", "content": "{question}"}
|
88
|
+
]
|
89
|
+
)
|
94
90
|
project_name = "optimize-few-shot-bayesian-hotpot" # For Comet logging
|
95
91
|
|
96
92
|
optimizer = FewShotBayesianOptimizer(
|
@@ -102,26 +98,14 @@ optimizer = FewShotBayesianOptimizer(
|
|
102
98
|
seed=42,
|
103
99
|
)
|
104
100
|
|
105
|
-
|
106
|
-
|
107
|
-
inputs={
|
108
|
-
"output": from_llm_response_text(), # Get output from LLM
|
109
|
-
"reference": from_dataset_field(name="answer"), # Get reference from dataset
|
110
|
-
},
|
111
|
-
)
|
112
|
-
|
113
|
-
task_config = TaskConfig(
|
114
|
-
instruction_prompt=prompt_instruction,
|
115
|
-
input_dataset_fields=["question"], # Fields from dataset to use as input
|
116
|
-
output_dataset_field="answer", # Field in dataset for reference answer
|
117
|
-
use_chat_prompt=True, # Use chat-style prompting
|
118
|
-
)
|
101
|
+
def levenshtein_ratio(dataset_item, llm_output):
|
102
|
+
return LevenshteinRatio().score(reference=dataset_item["answer"], output=llm_output)
|
119
103
|
|
120
104
|
# Run the optimization
|
121
105
|
result = optimizer.optimize_prompt(
|
106
|
+
prompt=prompt,
|
122
107
|
dataset=hot_pot_dataset,
|
123
|
-
|
124
|
-
task_config=task_config,
|
108
|
+
metric=levenshtein_ratio,
|
125
109
|
n_trials=10, # Number of optimization trials
|
126
110
|
n_samples=150, # Number of dataset samples for evaluation per trial
|
127
111
|
)
|
@@ -153,4 +137,4 @@ To contribute or use the Opik Optimizer from source:
|
|
153
137
|
|
154
138
|
- Python `>=3.9,<3.13`
|
155
139
|
- Opik API key (recommended for full functionality, configure via `opik configure`)
|
156
|
-
- API key for your chosen LLM provider (e.g., OpenAI, Anthropic, Gemini), configured as per LiteLLM guidelines.
|
140
|
+
- API key for your chosen LLM provider (e.g., OpenAI, Anthropic, Gemini), configured as per LiteLLM guidelines.
|
@@ -1,8 +1,8 @@
|
|
1
|
-
from setuptools import
|
1
|
+
from setuptools import find_packages, setup
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name="opik_optimizer",
|
5
|
-
version="0.
|
5
|
+
version="0.9.0",
|
6
6
|
description="Agent optimization with Opik",
|
7
7
|
author="Comet ML",
|
8
8
|
author_email="support@comet.com",
|
@@ -17,7 +17,7 @@ setup(
|
|
17
17
|
python_requires=">=3.9,<3.13",
|
18
18
|
install_requires=[
|
19
19
|
"opik>=1.7.17",
|
20
|
-
"dspy>=2.6.18
|
20
|
+
"dspy>=2.6.18,<=2.6.24",
|
21
21
|
"litellm",
|
22
22
|
"tqdm",
|
23
23
|
"datasets",
|
@@ -1,47 +1,36 @@
|
|
1
1
|
import importlib.metadata
|
2
2
|
import logging
|
3
|
-
from .logging_config import setup_logging
|
4
3
|
|
5
|
-
|
4
|
+
from opik.evaluation.models.litellm import warning_filters
|
6
5
|
|
7
|
-
|
8
|
-
setup_logging(level=logging.WARNING)
|
6
|
+
from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import EvolutionaryOptimizer
|
9
7
|
|
10
|
-
|
11
|
-
from .mipro_optimizer import MiproOptimizer
|
8
|
+
from . import datasets
|
12
9
|
from .base_optimizer import BaseOptimizer
|
13
|
-
from .meta_prompt_optimizer import MetaPromptOptimizer
|
14
10
|
from .few_shot_bayesian_optimizer import FewShotBayesianOptimizer
|
15
|
-
from .
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
from .
|
21
|
-
from_dataset_field,
|
22
|
-
from_llm_response_text,
|
23
|
-
)
|
11
|
+
from .logging_config import setup_logging
|
12
|
+
from .meta_prompt_optimizer import MetaPromptOptimizer
|
13
|
+
from .mipro_optimizer import MiproOptimizer
|
14
|
+
from .optimization_config.chat_prompt import ChatPrompt
|
15
|
+
from .optimization_config.configs import TaskConfig
|
16
|
+
from .optimization_result import OptimizationResult
|
24
17
|
|
25
|
-
|
26
|
-
from . import datasets
|
18
|
+
__version__ = importlib.metadata.version("opik_optimizer")
|
27
19
|
|
28
|
-
|
20
|
+
# Using WARNING as a sensible default to avoid flooding users with INFO/DEBUG
|
21
|
+
setup_logging(level=logging.WARNING)
|
29
22
|
|
30
|
-
|
31
|
-
from opik_optimizer.evolutionary_optimizer.evolutionary_optimizer import EvolutionaryOptimizer
|
23
|
+
warning_filters.add_warning_filters()
|
32
24
|
|
33
25
|
__all__ = [
|
34
26
|
"BaseOptimizer",
|
27
|
+
"ChatPrompt",
|
35
28
|
"FewShotBayesianOptimizer",
|
36
29
|
"MetaPromptOptimizer",
|
37
30
|
"MiproOptimizer",
|
38
31
|
"EvolutionaryOptimizer",
|
39
|
-
"MetricConfig",
|
40
|
-
"OptimizationConfig",
|
41
|
-
"TaskConfig",
|
42
|
-
"from_dataset_field",
|
43
|
-
"from_llm_response_text",
|
44
32
|
"OptimizationResult",
|
45
33
|
"setup_logging",
|
46
34
|
"datasets",
|
35
|
+
"TaskConfig"
|
47
36
|
]
|
@@ -1,16 +1,16 @@
|
|
1
|
-
from typing import Optional, Union, List, Dict, Any
|
2
|
-
import opik
|
3
1
|
import logging
|
4
2
|
import time
|
3
|
+
from abc import abstractmethod
|
4
|
+
from typing import Any, Callable, Dict, List, Optional
|
5
5
|
|
6
6
|
import litellm
|
7
|
-
|
7
|
+
import opik
|
8
8
|
from opik.rest_api.core import ApiError
|
9
|
-
|
10
9
|
from pydantic import BaseModel
|
10
|
+
|
11
|
+
from . import _throttle, optimization_result
|
11
12
|
from .cache_config import initialize_cache
|
12
|
-
from
|
13
|
-
from .optimization_config.configs import TaskConfig, MetricConfig
|
13
|
+
from .optimization_config import chat_prompt
|
14
14
|
|
15
15
|
_limiter = _throttle.get_rate_limiter_for_current_opik_installation()
|
16
16
|
|
@@ -22,11 +22,13 @@ logger = logging.getLogger(__name__)
|
|
22
22
|
|
23
23
|
|
24
24
|
class OptimizationRound(BaseModel):
|
25
|
+
model_config = {"arbitrary_types_allowed": True}
|
26
|
+
|
25
27
|
round_number: int
|
26
|
-
current_prompt:
|
28
|
+
current_prompt: "chat_prompt.ChatPrompt"
|
27
29
|
current_score: float
|
28
|
-
generated_prompts:
|
29
|
-
best_prompt:
|
30
|
+
generated_prompts: Any
|
31
|
+
best_prompt: "chat_prompt.ChatPrompt"
|
30
32
|
best_score: float
|
31
33
|
improvement: float
|
32
34
|
|
@@ -54,46 +56,37 @@ class BaseOptimizer:
|
|
54
56
|
# Initialize shared cache
|
55
57
|
initialize_cache()
|
56
58
|
|
59
|
+
@abstractmethod
|
57
60
|
def optimize_prompt(
|
58
61
|
self,
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
prompt: str,
|
63
|
-
input_key: str,
|
64
|
-
output_key: str,
|
62
|
+
prompt: chat_prompt.ChatPrompt,
|
63
|
+
dataset: opik.Dataset,
|
64
|
+
metrics: List[Callable],
|
65
65
|
experiment_config: Optional[Dict] = None,
|
66
66
|
**kwargs,
|
67
|
-
):
|
67
|
+
) -> optimization_result.OptimizationResult:
|
68
68
|
"""
|
69
69
|
Optimize a prompt.
|
70
70
|
|
71
71
|
Args:
|
72
72
|
dataset: Opik dataset name, or Opik dataset
|
73
|
-
|
74
|
-
|
73
|
+
metrics: A list of metric functions, these functions should have two arguments:
|
74
|
+
dataset_item and llm_output
|
75
75
|
prompt: the prompt to optimize
|
76
76
|
input_key: input field of dataset
|
77
77
|
output_key: output field of dataset
|
78
78
|
experiment_config: Optional configuration for the experiment
|
79
79
|
**kwargs: Additional arguments for optimization
|
80
80
|
"""
|
81
|
-
|
82
|
-
self.metric = metric
|
83
|
-
self.prompt = prompt
|
84
|
-
self.input_key = input_key
|
85
|
-
self.output_key = output_key
|
86
|
-
self.experiment_config = experiment_config
|
81
|
+
pass
|
87
82
|
|
83
|
+
@abstractmethod
|
88
84
|
def evaluate_prompt(
|
89
85
|
self,
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
output_key: str,
|
95
|
-
n_samples: int = 10,
|
96
|
-
task_config: Optional[TaskConfig] = None,
|
86
|
+
prompt: chat_prompt.ChatPrompt,
|
87
|
+
dataset: opik.Dataset,
|
88
|
+
metrics: List[Callable],
|
89
|
+
n_samples: Optional[int] = None,
|
97
90
|
dataset_item_ids: Optional[List[str]] = None,
|
98
91
|
experiment_config: Optional[Dict] = None,
|
99
92
|
**kwargs,
|
@@ -102,12 +95,10 @@ class BaseOptimizer:
|
|
102
95
|
Evaluate a prompt.
|
103
96
|
|
104
97
|
Args:
|
105
|
-
dataset: Opik dataset name, or Opik dataset
|
106
|
-
metric_config: instance of a MetricConfig
|
107
|
-
task_config: instance of a TaskConfig
|
108
98
|
prompt: the prompt to evaluate
|
109
|
-
|
110
|
-
|
99
|
+
dataset: Opik dataset name, or Opik dataset
|
100
|
+
metrics: A list of metric functions, these functions should have two arguments:
|
101
|
+
dataset_item and llm_output
|
111
102
|
n_samples: number of items to test in the dataset
|
112
103
|
dataset_item_ids: Optional list of dataset item IDs to evaluate
|
113
104
|
experiment_config: Optional configuration for the experiment
|
@@ -116,14 +107,7 @@ class BaseOptimizer:
|
|
116
107
|
Returns:
|
117
108
|
float: The evaluation score
|
118
109
|
"""
|
119
|
-
|
120
|
-
self.metric_config = metric_config
|
121
|
-
self.task_config = task_config
|
122
|
-
self.prompt = prompt
|
123
|
-
self.input_key = input_key
|
124
|
-
self.output_key = output_key
|
125
|
-
self.experiment_config = experiment_config
|
126
|
-
return 0.0 # Base implementation returns 0
|
110
|
+
pass
|
127
111
|
|
128
112
|
def get_history(self) -> List[Dict[str, Any]]:
|
129
113
|
"""
|
@@ -1,15 +1,14 @@
|
|
1
|
-
from .hotpot_qa import hotpot_300, hotpot_500
|
2
|
-
from .halu_eval import halu_eval_300
|
3
|
-
from .tiny_test import tiny_test
|
4
|
-
from .gsm8k import gsm8k
|
5
1
|
from .ai2_arc import ai2_arc
|
6
|
-
from .truthful_qa import truthful_qa
|
7
2
|
from .cnn_dailymail import cnn_dailymail
|
8
|
-
from .ragbench import ragbench_sentence_relevance
|
9
3
|
from .election_questions import election_questions
|
4
|
+
from .gsm8k import gsm8k
|
5
|
+
from .halu_eval import halu_eval_300
|
6
|
+
from .hotpot_qa import hotpot_300, hotpot_500
|
10
7
|
from .medhallu import medhallu
|
11
8
|
from .rag_hallucinations import rag_hallucinations
|
12
|
-
|
9
|
+
from .ragbench import ragbench_sentence_relevance
|
10
|
+
from .tiny_test import tiny_test
|
11
|
+
from .truthful_qa import truthful_qa
|
13
12
|
|
14
13
|
__all__ = [
|
15
14
|
"hotpot_300",
|