opik-optimizer 0.8.1__py3-none-any.whl → 0.9.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +15 -26
- opik_optimizer/base_optimizer.py +28 -44
- opik_optimizer/datasets/__init__.py +6 -7
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +742 -726
- opik_optimizer/evolutionary_optimizer/reporting.py +246 -0
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +296 -194
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +119 -0
- opik_optimizer/meta_prompt_optimizer/__init__.py +5 -0
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +816 -0
- opik_optimizer/meta_prompt_optimizer/reporting.py +140 -0
- opik_optimizer/mipro_optimizer/__init__.py +1 -1
- opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +12 -20
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +32 -52
- opik_optimizer/mipro_optimizer/utils.py +1 -23
- opik_optimizer/optimization_config/chat_prompt.py +106 -0
- opik_optimizer/optimization_config/configs.py +2 -21
- opik_optimizer/optimization_config/mappers.py +1 -1
- opik_optimizer/optimization_result.py +57 -85
- opik_optimizer/reporting_utils.py +180 -0
- opik_optimizer/task_evaluator.py +33 -25
- opik_optimizer/utils.py +187 -3
- {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0rc0.dist-info}/METADATA +15 -31
- opik_optimizer-0.9.0rc0.dist-info/RECORD +48 -0
- {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0rc0.dist-info}/WHEEL +1 -1
- opik_optimizer/few_shot_bayesian_optimizer/prompt_parameter.py +0 -91
- opik_optimizer/few_shot_bayesian_optimizer/prompt_templates.py +0 -80
- opik_optimizer/integrations/__init__.py +0 -0
- opik_optimizer/meta_prompt_optimizer.py +0 -1151
- opik_optimizer-0.8.1.dist-info/RECORD +0 -45
- {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0rc0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-0.8.1.dist-info → opik_optimizer-0.9.0rc0.dist-info}/top_level.txt +0 -0
@@ -1,47 +1,36 @@
|
|
1
1
|
"""Module containing the OptimizationResult class."""
|
2
2
|
|
3
|
-
from typing import Dict, List,
|
3
|
+
from typing import Any, Dict, List, Literal, Optional
|
4
|
+
|
4
5
|
import pydantic
|
5
|
-
from opik.evaluation.metrics import BaseMetric
|
6
|
-
from pydantic import BaseModel, Field
|
7
|
-
from .base_optimizer import OptimizationRound # Adjust import as necessary
|
8
6
|
import rich
|
9
7
|
|
10
|
-
|
11
|
-
"""Represents a single step or trial in an optimization process."""
|
12
|
-
step: int
|
13
|
-
score: Optional[float] = None
|
14
|
-
prompt: Optional[Union[str, List[Dict[str, str]]]] = None
|
15
|
-
parameters: Optional[Dict[str, Any]] = None
|
16
|
-
timestamp: Optional[str] = None
|
17
|
-
# Add other relevant details per step if needed
|
8
|
+
from .reporting_utils import get_console
|
18
9
|
|
19
10
|
|
20
11
|
class OptimizationResult(pydantic.BaseModel):
|
21
|
-
"""Result
|
12
|
+
"""Result oan optimization run."""
|
22
13
|
|
23
|
-
|
14
|
+
optimizer: str = "Optimizer"
|
15
|
+
|
16
|
+
prompt: List[Dict[Literal["role", "content"], str]]
|
24
17
|
score: float
|
25
18
|
metric_name: str
|
26
|
-
|
27
|
-
|
28
|
-
) # Default empty dict
|
29
|
-
details: Dict[str, Any] = pydantic.Field(default_factory=dict) # Default empty dict
|
30
|
-
best_prompt: Optional[str] = None
|
31
|
-
best_score: Optional[float] = None
|
32
|
-
best_metric_name: Optional[str] = None
|
33
|
-
best_details: Optional[Dict[str, Any]] = None
|
34
|
-
all_results: Optional[List[Dict[str, Any]]] = None
|
19
|
+
|
20
|
+
details: Dict[str, Any] = pydantic.Field(default_factory=dict)
|
35
21
|
history: List[Dict[str, Any]] = []
|
36
|
-
metric: Optional[BaseMetric] = None
|
37
|
-
demonstrations: Optional[List[Dict[str, Any]]] = None
|
38
|
-
optimizer: str = "Optimizer"
|
39
|
-
tool_prompts: Optional[Dict[str, str]] = None
|
40
|
-
opik_metadata: Optional[Dict[str, Any]] = None
|
41
22
|
llm_calls: Optional[int] = None
|
42
23
|
|
24
|
+
# MIPRO specific
|
25
|
+
demonstrations: Optional[List[Dict[str, Any]]] = None
|
26
|
+
mipro_prompt: Optional[str] = None
|
27
|
+
tool_prompts: Optional[Dict[str, str]] = None
|
28
|
+
|
43
29
|
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
|
44
30
|
|
31
|
+
def model_dump(self, *kargs, **kwargs) -> Dict[str, Any]:
|
32
|
+
return super().model_dump(*kargs, **kwargs)
|
33
|
+
|
45
34
|
def _calculate_improvement_str(self) -> str:
|
46
35
|
"""Helper to calculate improvement percentage string."""
|
47
36
|
initial_s = self.details.get("initial_score")
|
@@ -91,24 +80,19 @@ class OptimizationResult(pydantic.BaseModel):
|
|
91
80
|
temp = self.details.get("temperature")
|
92
81
|
temp_str = f"{temp:.1f}" if isinstance(temp, (int, float)) else "N/A"
|
93
82
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
]
|
104
|
-
)
|
105
|
-
final_prompt_display = f"Instruction:\n {self.prompt}\nFew-Shot Examples (Chat Structure):\n{chat_display}"
|
106
|
-
except Exception:
|
107
|
-
pass
|
83
|
+
try:
|
84
|
+
final_prompt_display = "\n".join(
|
85
|
+
[
|
86
|
+
f" {msg.get('role', 'unknown')}: {str(msg.get('content', ''))[:150]}..."
|
87
|
+
for msg in self.prompt
|
88
|
+
]
|
89
|
+
)
|
90
|
+
except Exception:
|
91
|
+
final_prompt_display = str(self.prompt)
|
108
92
|
|
109
93
|
output = [
|
110
94
|
f"\n{separator}",
|
111
|
-
|
95
|
+
"OPTIMIZATION COMPLETE",
|
112
96
|
f"{separator}",
|
113
97
|
f"Optimizer: {self.optimizer}",
|
114
98
|
f"Model Used: {model_name} (Temp: {temp_str})",
|
@@ -118,10 +102,10 @@ class OptimizationResult(pydantic.BaseModel):
|
|
118
102
|
f"Total Improvement:{improvement_str.rjust(max(0, 18 - len('Total Improvement:')))}",
|
119
103
|
f"Rounds Completed: {rounds_ran}",
|
120
104
|
f"Stopped Early: {stopped_early}",
|
121
|
-
|
122
|
-
|
105
|
+
"\nFINAL OPTIMIZED PROMPT / STRUCTURE:",
|
106
|
+
"--------------------------------------------------------------------------------",
|
123
107
|
f"{final_prompt_display}",
|
124
|
-
|
108
|
+
"--------------------------------------------------------------------------------",
|
125
109
|
f"{separator}",
|
126
110
|
]
|
127
111
|
return "\n".join(output)
|
@@ -160,43 +144,33 @@ class OptimizationResult(pydantic.BaseModel):
|
|
160
144
|
table.add_row("Stopped Early:", str(stopped_early))
|
161
145
|
|
162
146
|
# Display Chat Structure if available
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
for msg in self.details["chat_messages"]:
|
177
|
-
role = msg.get("role", "unknown")
|
178
|
-
content = str(msg.get("content", ""))
|
179
|
-
role_style = (
|
180
|
-
"bold green"
|
181
|
-
if role == "user"
|
182
|
-
else (
|
183
|
-
"bold blue"
|
184
|
-
if role == "assistant"
|
185
|
-
else ("bold magenta" if role == "system" else "")
|
186
|
-
)
|
187
|
-
)
|
188
|
-
chat_group_items.append(
|
189
|
-
f"[{role_style}]{role.capitalize()}:[/] {content}"
|
147
|
+
panel_title = "[bold]Final Optimized Prompt[/bold]"
|
148
|
+
try:
|
149
|
+
chat_group_items = []
|
150
|
+
for msg in self.prompt:
|
151
|
+
role = msg.get("role", "unknown")
|
152
|
+
content = str(msg.get("content", ""))
|
153
|
+
role_style = (
|
154
|
+
"bold green"
|
155
|
+
if role == "user"
|
156
|
+
else (
|
157
|
+
"bold blue"
|
158
|
+
if role == "assistant"
|
159
|
+
else ("bold magenta" if role == "system" else "")
|
190
160
|
)
|
191
|
-
chat_group_items.append("---") # Separator
|
192
|
-
prompt_renderable = rich.console.Group(*chat_group_items)
|
193
|
-
|
194
|
-
except Exception:
|
195
|
-
# Fallback to simple text prompt
|
196
|
-
prompt_renderable = rich.text.Text(self.prompt or "", overflow="fold")
|
197
|
-
panel_title = (
|
198
|
-
"[bold]Final Optimized Prompt (Instruction - fallback)[/bold]"
|
199
161
|
)
|
162
|
+
chat_group_items.append(
|
163
|
+
f"[{role_style}]{role.capitalize()}:[/] {content}"
|
164
|
+
)
|
165
|
+
chat_group_items.append("---") # Separator
|
166
|
+
prompt_renderable = rich.console.Group(*chat_group_items)
|
167
|
+
|
168
|
+
except Exception:
|
169
|
+
# Fallback to simple text prompt
|
170
|
+
prompt_renderable = rich.text.Text(str(self.prompt or ""), overflow="fold")
|
171
|
+
panel_title = (
|
172
|
+
"[bold]Final Optimized Prompt (Instruction - fallback)[/bold]"
|
173
|
+
)
|
200
174
|
|
201
175
|
prompt_panel = rich.panel.Panel(
|
202
176
|
prompt_renderable, title=panel_title, border_style="blue", padding=(1, 2)
|
@@ -212,11 +186,9 @@ class OptimizationResult(pydantic.BaseModel):
|
|
212
186
|
padding=1,
|
213
187
|
)
|
214
188
|
|
215
|
-
def model_dump(self) -> Dict[str, Any]:
|
216
|
-
return super().model_dump()
|
217
|
-
|
218
189
|
def display(self) -> None:
|
219
190
|
"""
|
220
191
|
Displays the OptimizationResult using rich formatting
|
221
192
|
"""
|
222
|
-
|
193
|
+
console = get_console()
|
194
|
+
console.print(self)
|
@@ -0,0 +1,180 @@
|
|
1
|
+
import logging
|
2
|
+
from contextlib import contextmanager
|
3
|
+
from typing import Dict, List, Optional
|
4
|
+
|
5
|
+
import rich
|
6
|
+
from rich import box
|
7
|
+
from rich.console import Console, Group
|
8
|
+
from rich.panel import Panel
|
9
|
+
from rich.progress import track
|
10
|
+
from rich.text import Text
|
11
|
+
|
12
|
+
PANEL_WIDTH = 70
|
13
|
+
|
14
|
+
def get_console(*args, **kwargs):
|
15
|
+
console = Console(*args, **kwargs)
|
16
|
+
console.is_jupyter = False
|
17
|
+
return console
|
18
|
+
|
19
|
+
@contextmanager
|
20
|
+
def convert_tqdm_to_rich(description: Optional[str] = None, verbose: int = 1):
|
21
|
+
"""Context manager to convert tqdm to rich."""
|
22
|
+
import opik.evaluation.engine.evaluation_tasks_executor
|
23
|
+
|
24
|
+
optimizer_logger = logging.getLogger('opik_optimizer')
|
25
|
+
|
26
|
+
def _tqdm_to_track(iterable, desc, disable, total):
|
27
|
+
disable = verbose == 0 or optimizer_logger.level > logging.INFO
|
28
|
+
return track(
|
29
|
+
iterable,
|
30
|
+
description=description or desc,
|
31
|
+
disable=disable,
|
32
|
+
total=total
|
33
|
+
)
|
34
|
+
|
35
|
+
original__tqdm = opik.evaluation.engine.evaluation_tasks_executor._tqdm
|
36
|
+
opik.evaluation.engine.evaluation_tasks_executor._tqdm = _tqdm_to_track
|
37
|
+
|
38
|
+
|
39
|
+
from opik.evaluation import report
|
40
|
+
report.display_experiment_results = lambda *args, **kwargs: None
|
41
|
+
report.display_experiment_link = lambda *args, **kwargs: None
|
42
|
+
|
43
|
+
try:
|
44
|
+
yield
|
45
|
+
finally:
|
46
|
+
opik.evaluation.engine.evaluation_tasks_executor._tqdm = original__tqdm
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
@contextmanager
|
51
|
+
def suppress_opik_logs():
|
52
|
+
"""Suppress Opik startup logs by temporarily increasing the log level."""
|
53
|
+
# Optimizer log level
|
54
|
+
optimizer_logger = logging.getLogger('opik_optimizer')
|
55
|
+
|
56
|
+
# Get the Opik logger
|
57
|
+
opik_logger = logging.getLogger("opik.api_objects.opik_client")
|
58
|
+
|
59
|
+
# Store original log level
|
60
|
+
original_level = opik_logger.level
|
61
|
+
|
62
|
+
# Set log level to ERROR to suppress INFO messages
|
63
|
+
opik_logger.setLevel(optimizer_logger.level)
|
64
|
+
|
65
|
+
try:
|
66
|
+
yield
|
67
|
+
finally:
|
68
|
+
# Restore original log level
|
69
|
+
opik_logger.setLevel(original_level)
|
70
|
+
|
71
|
+
def display_messages(messages: List[Dict[str, str]], prefix: str = ""):
|
72
|
+
for i, msg in enumerate(messages):
|
73
|
+
panel = Panel(
|
74
|
+
Text(msg.get('content', ''), overflow="fold"),
|
75
|
+
title=f"{msg.get('role', 'message')}",
|
76
|
+
title_align="left",
|
77
|
+
border_style="dim",
|
78
|
+
width=PANEL_WIDTH,
|
79
|
+
padding=(1, 2),
|
80
|
+
)
|
81
|
+
|
82
|
+
# Capture the panel as rendered text with ANSI styles
|
83
|
+
console = get_console()
|
84
|
+
with console.capture() as capture:
|
85
|
+
console.print(panel)
|
86
|
+
|
87
|
+
# Retrieve the rendered string (with ANSI)
|
88
|
+
rendered_panel = capture.get()
|
89
|
+
|
90
|
+
# Prefix each line with '| ', preserving ANSI styles
|
91
|
+
for line in rendered_panel.splitlines():
|
92
|
+
console.print(Text(prefix) + Text.from_ansi(line))
|
93
|
+
|
94
|
+
def display_header(algorithm: str, verbose: int = 1):
|
95
|
+
if verbose < 1:
|
96
|
+
return
|
97
|
+
|
98
|
+
content = Text.assemble(
|
99
|
+
("● ", "green"),
|
100
|
+
"Running Opik Evaluation - ",
|
101
|
+
(algorithm, "blue")
|
102
|
+
)
|
103
|
+
|
104
|
+
panel = Panel(
|
105
|
+
content,
|
106
|
+
box=box.ROUNDED,
|
107
|
+
width=PANEL_WIDTH
|
108
|
+
)
|
109
|
+
|
110
|
+
console = get_console()
|
111
|
+
console.print(panel)
|
112
|
+
console.print("\n")
|
113
|
+
|
114
|
+
|
115
|
+
def display_result(initial_score, best_score, best_prompt, verbose: int = 1):
|
116
|
+
if verbose < 1:
|
117
|
+
return
|
118
|
+
|
119
|
+
console = get_console()
|
120
|
+
console.print(Text("\n> Optimization complete\n"))
|
121
|
+
|
122
|
+
if best_score > initial_score:
|
123
|
+
if initial_score == 0:
|
124
|
+
content = [Text(f"Prompt was optimized and improved from {initial_score:.4f} to {best_score:.4f}", style="bold green")]
|
125
|
+
else:
|
126
|
+
perc_change = (best_score - initial_score) / initial_score
|
127
|
+
content = [Text(f"Prompt was optimized and improved from {initial_score:.4f} to {best_score:.4f} ({perc_change:.2%})", style="bold green")]
|
128
|
+
else:
|
129
|
+
content = [Text("Optimization trial did not find a better prompt than the initial one.", style="bold red")]
|
130
|
+
|
131
|
+
content.append(Text("\nOptimized prompt:"))
|
132
|
+
for i, msg in enumerate(best_prompt):
|
133
|
+
content.append(
|
134
|
+
Panel(
|
135
|
+
Text(msg.get('content', ''), overflow="fold"),
|
136
|
+
title=f"{msg.get('role', 'message')}",
|
137
|
+
title_align="left",
|
138
|
+
border_style="dim",
|
139
|
+
width=PANEL_WIDTH,
|
140
|
+
padding=(1, 2),
|
141
|
+
)
|
142
|
+
)
|
143
|
+
|
144
|
+
console.print(
|
145
|
+
Panel(
|
146
|
+
Group(*content),
|
147
|
+
title="Optimization results",
|
148
|
+
title_align="left",
|
149
|
+
border_style="green",
|
150
|
+
width=PANEL_WIDTH,
|
151
|
+
padding=(1, 2)
|
152
|
+
)
|
153
|
+
)
|
154
|
+
|
155
|
+
|
156
|
+
def display_configuration(messages: List[Dict[str, str]], optimizer_config: Dict[str, str], verbose: int = 1):
|
157
|
+
"""Displays the LLM messages and optimizer configuration using Rich panels."""
|
158
|
+
|
159
|
+
if verbose < 1:
|
160
|
+
return
|
161
|
+
|
162
|
+
# Panel for Optimizer configuration
|
163
|
+
console = get_console()
|
164
|
+
console.print(Text("> Let's optimize the prompt:\n"))
|
165
|
+
|
166
|
+
display_messages(messages)
|
167
|
+
|
168
|
+
# Panel for configuration
|
169
|
+
console.print(Text(f"\nUsing {optimizer_config['optimizer']} with the parameters: "))
|
170
|
+
|
171
|
+
for key, value in optimizer_config.items():
|
172
|
+
if key == "optimizer": # Already displayed in the introductory text
|
173
|
+
continue
|
174
|
+
parameter_text = Text.assemble(
|
175
|
+
Text(f" - {key}: ", style="dim"),
|
176
|
+
Text(str(value), style="cyan")
|
177
|
+
)
|
178
|
+
console.print(parameter_text)
|
179
|
+
|
180
|
+
console.print("\n")
|
opik_optimizer/task_evaluator.py
CHANGED
@@ -1,17 +1,40 @@
|
|
1
|
-
import opik
|
2
1
|
import logging
|
3
2
|
from typing import Any, Callable, Dict, List, Optional
|
4
|
-
from opik_optimizer.optimization_config.configs import MetricConfig
|
5
|
-
from opik.evaluation.metrics import score_result
|
6
3
|
|
4
|
+
import opik
|
7
5
|
from opik.evaluation import evaluator as opik_evaluator
|
6
|
+
from opik.evaluation.metrics import base_metric, score_result
|
8
7
|
|
9
8
|
logger = logging.getLogger(__name__)
|
10
9
|
|
10
|
+
def _create_metric_class(metric: Callable):
|
11
|
+
class MetricClass(base_metric.BaseMetric):
|
12
|
+
def __init__(self):
|
13
|
+
self.name = metric.__name__
|
14
|
+
|
15
|
+
def score(self, llm_output, **kwargs) -> score_result.ScoreResult:
|
16
|
+
try:
|
17
|
+
metric_val = metric(dataset_item=kwargs, llm_output=llm_output)
|
18
|
+
if isinstance(metric_val , score_result.ScoreResult):
|
19
|
+
return metric_val
|
20
|
+
else:
|
21
|
+
return score_result.ScoreResult(
|
22
|
+
name = self.name,
|
23
|
+
value = metric_val
|
24
|
+
)
|
25
|
+
except Exception:
|
26
|
+
return score_result.ScoreResult(
|
27
|
+
name = self.name,
|
28
|
+
value = 0,
|
29
|
+
scoring_failed=True
|
30
|
+
)
|
31
|
+
|
32
|
+
return MetricClass()
|
33
|
+
|
11
34
|
def evaluate(
|
12
35
|
dataset: opik.Dataset,
|
13
36
|
evaluated_task: Callable[[Dict[str, Any]], Dict[str, Any]],
|
14
|
-
|
37
|
+
metric: Callable,
|
15
38
|
num_threads: int,
|
16
39
|
optimization_id: Optional[str] = None,
|
17
40
|
dataset_item_ids: Optional[List[str]] = None,
|
@@ -25,7 +48,8 @@ def evaluate(
|
|
25
48
|
|
26
49
|
Args:
|
27
50
|
dataset: A list of dictionaries representing the dataset.
|
28
|
-
|
51
|
+
metric: A metric function, this function should have two arguments:
|
52
|
+
dataset_item and llm_output
|
29
53
|
evaluated_task: A function that takes a dataset item dict as input and returns a dictionary with output(s).
|
30
54
|
dataset_item_ids: Optional list of dataset item IDs to evaluate.
|
31
55
|
project_name: Optional project name for evaluation.
|
@@ -38,7 +62,7 @@ def evaluate(
|
|
38
62
|
Returns:
|
39
63
|
float: The average score of the evaluated task.
|
40
64
|
"""
|
41
|
-
items = dataset.get_items(
|
65
|
+
items = dataset.get_items(n_samples)
|
42
66
|
if not items:
|
43
67
|
print("[DEBUG] Empty dataset, returning 0.0")
|
44
68
|
return 0.0
|
@@ -46,21 +70,7 @@ def evaluate(
|
|
46
70
|
if dataset_item_ids:
|
47
71
|
items = [item for item in items if item.get("id") in dataset_item_ids]
|
48
72
|
|
49
|
-
|
50
|
-
items = items[:n_samples]
|
51
|
-
|
52
|
-
# TODO: move to debug logger
|
53
|
-
# print(f"[DEBUG] Starting evaluation with task: {evaluated_task}")
|
54
|
-
# print(f"[DEBUG] Items to evaluate: {items}")
|
55
|
-
# print(f"[DEBUG] Metric config inputs: {metric_config.inputs}")
|
56
|
-
# print(f"[DEBUG] Number of threads: {num_threads}")
|
57
|
-
# print(f"[DEBUG] Project name: {project_name}")
|
58
|
-
|
59
|
-
scoring_key_mapping = {
|
60
|
-
key: value if isinstance(value, str) else value.__name__
|
61
|
-
for key, value in metric_config.inputs.items()
|
62
|
-
}
|
63
|
-
scoring_key_mapping["output"] = "_llm_task_output"
|
73
|
+
eval_metrics = [_create_metric_class(metric)]
|
64
74
|
|
65
75
|
if optimization_id is not None:
|
66
76
|
result = opik_evaluator.evaluate_optimization_trial(
|
@@ -68,9 +78,8 @@ def evaluate(
|
|
68
78
|
dataset=dataset,
|
69
79
|
task=evaluated_task,
|
70
80
|
project_name=project_name,
|
71
|
-
scoring_key_mapping=scoring_key_mapping,
|
72
81
|
dataset_item_ids=dataset_item_ids,
|
73
|
-
scoring_metrics=
|
82
|
+
scoring_metrics=eval_metrics,
|
74
83
|
task_threads=num_threads,
|
75
84
|
nb_samples=n_samples,
|
76
85
|
experiment_config=experiment_config,
|
@@ -81,9 +90,8 @@ def evaluate(
|
|
81
90
|
dataset=dataset,
|
82
91
|
task=evaluated_task,
|
83
92
|
project_name=project_name,
|
84
|
-
scoring_key_mapping=scoring_key_mapping,
|
85
93
|
dataset_item_ids=dataset_item_ids,
|
86
|
-
scoring_metrics=
|
94
|
+
scoring_metrics=eval_metrics,
|
87
95
|
task_threads=num_threads,
|
88
96
|
nb_samples=n_samples,
|
89
97
|
experiment_config=experiment_config,
|