inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/common.py +7 -3
- inspect_ai/_cli/eval.py +17 -2
- inspect_ai/_cli/trace.py +21 -2
- inspect_ai/_display/core/active.py +4 -3
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +4 -9
- inspect_ai/_display/textual/app.py +4 -1
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +119 -16
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +32 -20
- inspect_ai/_eval/evalset.py +7 -5
- inspect_ai/_eval/score.py +1 -0
- inspect_ai/_eval/task/__init__.py +2 -2
- inspect_ai/_eval/task/images.py +40 -25
- inspect_ai/_eval/task/results.py +50 -22
- inspect_ai/_eval/task/run.py +180 -124
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_eval/task/task.py +140 -25
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/content.py +23 -1
- inspect_ai/_util/images.py +20 -17
- inspect_ai/_util/kvstore.py +73 -0
- inspect_ai/_util/notgiven.py +18 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_util/thread.py +5 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25375 -1846
- inspect_ai/_view/www/log-schema.json +129 -15
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +8 -10
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
- inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +75 -2
- inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +62 -27
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/Json.mjs +12 -6
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/util.py +2 -2
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/dataset/_sources/csv.py +2 -1
- inspect_ai/dataset/_sources/json.py +2 -1
- inspect_ai/dataset/_sources/util.py +15 -7
- inspect_ai/log/_condense.py +11 -1
- inspect_ai/log/_log.py +3 -6
- inspect_ai/log/_recorders/eval.py +19 -8
- inspect_ai/log/_samples.py +26 -5
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +10 -2
- inspect_ai/model/_call_tools.py +59 -12
- inspect_ai/model/_chat_message.py +2 -4
- inspect_ai/model/_conversation.py +61 -0
- inspect_ai/model/_generate_config.py +10 -4
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +7 -2
- inspect_ai/model/_providers/anthropic.py +109 -51
- inspect_ai/model/_providers/azureai.py +26 -24
- inspect_ai/model/_providers/bedrock.py +43 -44
- inspect_ai/model/_providers/google.py +121 -58
- inspect_ai/model/_providers/groq.py +7 -5
- inspect_ai/model/_providers/hf.py +11 -6
- inspect_ai/model/_providers/mistral.py +17 -20
- inspect_ai/model/_providers/openai.py +32 -21
- inspect_ai/model/_providers/openai_o1.py +9 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +8 -8
- inspect_ai/model/_providers/vertex.py +18 -8
- inspect_ai/scorer/__init__.py +13 -2
- inspect_ai/scorer/_metrics/__init__.py +2 -2
- inspect_ai/scorer/_metrics/std.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +2 -2
- inspect_ai/solver/__init__.py +2 -5
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +11 -1
- inspect_ai/tool/_tool.py +21 -3
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -3
- inspect_ai/util/{_trace.py → _conversation.py} +3 -17
- inspect_ai/util/_display.py +14 -4
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/context.py +12 -13
- inspect_ai/util/_sandbox/docker/compose.py +24 -11
- inspect_ai/util/_sandbox/docker/docker.py +84 -14
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/environment.py +27 -1
- inspect_ai/util/_sandbox/local.py +1 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/model/_trace.py +0 -48
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/task/images.py
CHANGED
@@ -1,66 +1,69 @@
|
|
1
1
|
import asyncio
|
2
2
|
|
3
3
|
from inspect_ai._util.constants import BASE_64_DATA_REMOVED
|
4
|
-
from inspect_ai._util.
|
4
|
+
from inspect_ai._util.content import Content, ContentAudio, ContentImage, ContentVideo
|
5
|
+
from inspect_ai._util.images import file_as_data_uri
|
5
6
|
from inspect_ai._util.url import is_data_uri
|
6
7
|
from inspect_ai.dataset import Sample
|
7
|
-
from inspect_ai.model import ChatMessage, ChatMessageUser
|
8
|
+
from inspect_ai.model import ChatMessage, ChatMessageUser
|
8
9
|
from inspect_ai.solver import TaskState
|
9
10
|
|
10
11
|
|
11
|
-
async def
|
12
|
-
return await asyncio.gather(*[
|
12
|
+
async def states_with_base64_content(states: list[TaskState]) -> list[TaskState]:
|
13
|
+
return await asyncio.gather(*[state_with_base64_content(state) for state in states])
|
13
14
|
|
14
15
|
|
15
|
-
async def
|
16
|
-
state.messages = await
|
16
|
+
async def state_with_base64_content(state: TaskState) -> TaskState:
|
17
|
+
state.messages = await messages_with_base64_content(state.messages)
|
17
18
|
return state
|
18
19
|
|
19
20
|
|
20
|
-
def
|
21
|
-
state.messages =
|
21
|
+
def state_without_base64_content(state: TaskState) -> TaskState:
|
22
|
+
state.messages = messages_without_base64_content(state.messages)
|
22
23
|
return state
|
23
24
|
|
24
25
|
|
25
|
-
async def
|
26
|
+
async def samples_with_base64_content(samples: list[Sample]) -> list[Sample]:
|
26
27
|
return await asyncio.gather(
|
27
|
-
*[
|
28
|
+
*[sample_with_base64_content(sample) for sample in samples]
|
28
29
|
)
|
29
30
|
|
30
31
|
|
31
|
-
async def
|
32
|
+
async def sample_with_base64_content(sample: Sample) -> Sample:
|
32
33
|
if isinstance(sample.input, list):
|
33
34
|
return sample.model_copy(
|
34
|
-
update={"input": await
|
35
|
+
update={"input": await messages_with_base64_content(sample.input)}
|
35
36
|
)
|
36
37
|
else:
|
37
38
|
return sample
|
38
39
|
|
39
40
|
|
40
|
-
def
|
41
|
+
def sample_without_base64_content(sample: Sample) -> Sample:
|
41
42
|
if isinstance(sample.input, list):
|
42
43
|
return sample.model_copy(
|
43
|
-
update={"input":
|
44
|
+
update={"input": messages_without_base64_content(sample.input)}
|
44
45
|
)
|
45
46
|
else:
|
46
47
|
return sample
|
47
48
|
|
48
49
|
|
49
|
-
async def
|
50
|
+
async def messages_with_base64_content(
|
51
|
+
messages: list[ChatMessage],
|
52
|
+
) -> list[ChatMessage]:
|
50
53
|
return await asyncio.gather(
|
51
|
-
*[
|
54
|
+
*[message_with_base64_content(message) for message in messages]
|
52
55
|
)
|
53
56
|
|
54
57
|
|
55
|
-
def
|
56
|
-
return [
|
58
|
+
def messages_without_base64_content(messages: list[ChatMessage]) -> list[ChatMessage]:
|
59
|
+
return [message_without_base64_content(message) for message in messages]
|
57
60
|
|
58
61
|
|
59
|
-
async def
|
62
|
+
async def message_with_base64_content(message: ChatMessage) -> ChatMessage:
|
60
63
|
if isinstance(message, ChatMessageUser) and not isinstance(message.content, str):
|
61
64
|
return ChatMessageUser(
|
62
65
|
content=[
|
63
|
-
await
|
66
|
+
await chat_content_with_base64_content(content)
|
64
67
|
for content in message.content
|
65
68
|
],
|
66
69
|
source=message.source,
|
@@ -69,11 +72,11 @@ async def message_with_base64_image(message: ChatMessage) -> ChatMessage:
|
|
69
72
|
return message
|
70
73
|
|
71
74
|
|
72
|
-
def
|
75
|
+
def message_without_base64_content(message: ChatMessage) -> ChatMessage:
|
73
76
|
if isinstance(message, ChatMessageUser) and not isinstance(message.content, str):
|
74
77
|
return ChatMessageUser(
|
75
78
|
content=[
|
76
|
-
|
79
|
+
chat_content_without_base64_content(content)
|
77
80
|
for content in message.content
|
78
81
|
],
|
79
82
|
source=message.source,
|
@@ -82,18 +85,30 @@ def message_without_base64_image(message: ChatMessage) -> ChatMessage:
|
|
82
85
|
return message
|
83
86
|
|
84
87
|
|
85
|
-
async def
|
88
|
+
async def chat_content_with_base64_content(content: Content) -> Content:
|
86
89
|
if isinstance(content, ContentImage):
|
87
90
|
return ContentImage(
|
88
|
-
image=await
|
91
|
+
image=await file_as_data_uri(content.image),
|
89
92
|
detail=content.detail,
|
90
93
|
)
|
94
|
+
elif isinstance(content, ContentAudio):
|
95
|
+
return ContentAudio(
|
96
|
+
audio=await file_as_data_uri(content.audio), format=content.format
|
97
|
+
)
|
98
|
+
elif isinstance(content, ContentVideo):
|
99
|
+
return ContentVideo(
|
100
|
+
video=await file_as_data_uri(content.video), format=content.format
|
101
|
+
)
|
91
102
|
else:
|
92
103
|
return content
|
93
104
|
|
94
105
|
|
95
|
-
def
|
106
|
+
def chat_content_without_base64_content(content: Content) -> Content:
|
96
107
|
if isinstance(content, ContentImage) and is_data_uri(content.image):
|
97
108
|
return ContentImage(image=BASE_64_DATA_REMOVED, detail=content.detail)
|
109
|
+
elif isinstance(content, ContentAudio) and is_data_uri(content.audio):
|
110
|
+
return ContentAudio(audio=BASE_64_DATA_REMOVED, format="mp3")
|
111
|
+
elif isinstance(content, ContentVideo) and is_data_uri(content.video):
|
112
|
+
return ContentVideo(video=BASE_64_DATA_REMOVED, format="mp4")
|
98
113
|
else:
|
99
114
|
return content
|
inspect_ai/_eval/task/results.py
CHANGED
@@ -2,6 +2,7 @@ import fnmatch
|
|
2
2
|
import re
|
3
3
|
from collections import defaultdict
|
4
4
|
from copy import deepcopy
|
5
|
+
from dataclasses import dataclass, field
|
5
6
|
from typing import Any, Tuple, cast
|
6
7
|
|
7
8
|
from inspect_ai._util.registry import (
|
@@ -19,6 +20,8 @@ from inspect_ai.log import (
|
|
19
20
|
from inspect_ai.log._log import EvalSampleReductions
|
20
21
|
from inspect_ai.scorer import Metric, Score, Scorer
|
21
22
|
from inspect_ai.scorer._metric import SampleScore
|
23
|
+
from inspect_ai.scorer._metrics.accuracy import accuracy
|
24
|
+
from inspect_ai.scorer._metrics.std import stderr
|
22
25
|
from inspect_ai.scorer._reducer import ScoreReducer, mean_score, reducer_log_name
|
23
26
|
from inspect_ai.scorer._scorer import (
|
24
27
|
SCORER_METRICS,
|
@@ -27,6 +30,27 @@ from inspect_ai.scorer._scorer import (
|
|
27
30
|
)
|
28
31
|
|
29
32
|
|
33
|
+
@dataclass
|
34
|
+
class ScorerInfo:
|
35
|
+
name: str
|
36
|
+
metrics: list[Metric | dict[str, list[Metric]]] | dict[str, list[Metric]]
|
37
|
+
params: dict[str, Any] = field(default_factory=dict)
|
38
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
39
|
+
|
40
|
+
@staticmethod
|
41
|
+
def from_scorer(scorer: Scorer) -> "ScorerInfo":
|
42
|
+
name = registry_unqualified_name(scorer)
|
43
|
+
metrics = scorer_metrics(scorer)
|
44
|
+
metadata = deepcopy(registry_info(scorer).metadata)
|
45
|
+
del metadata[SCORER_METRICS]
|
46
|
+
params = registry_params(scorer)
|
47
|
+
return ScorerInfo(name=name, metrics=metrics, params=params, metadata=metadata)
|
48
|
+
|
49
|
+
@staticmethod
|
50
|
+
def from_name(name: str) -> "ScorerInfo":
|
51
|
+
return ScorerInfo(name=name, metrics=[accuracy(), stderr()])
|
52
|
+
|
53
|
+
|
30
54
|
def eval_results(
|
31
55
|
samples: int,
|
32
56
|
scores: list[dict[str, SampleScore]],
|
@@ -38,18 +62,23 @@ def eval_results(
|
|
38
62
|
results = EvalResults(total_samples=samples, completed_samples=len(scores))
|
39
63
|
reductions = None
|
40
64
|
|
65
|
+
# extract scorers info from scorers then create scorers info for any
|
66
|
+
# scores not already accounted for by a scorer name
|
67
|
+
scorers_info = [ScorerInfo.from_scorer(scorer) for scorer in (scorers or [])]
|
68
|
+
scorer_names = [info.name for info in scorers_info]
|
69
|
+
for name in set(key for sample_scores in scores for key in sample_scores):
|
70
|
+
if name not in scorer_names:
|
71
|
+
scorers_info.append(ScorerInfo.from_name(name))
|
72
|
+
scorer_names.append(name)
|
73
|
+
|
41
74
|
# record scorer
|
42
|
-
if
|
75
|
+
if len(scorers_info) > 0:
|
43
76
|
result_scores: list[EvalScore] = []
|
44
77
|
sample_reductions: list[EvalSampleReductions] = []
|
45
|
-
for
|
46
|
-
# extract non-metrics metadata
|
47
|
-
metadata = deepcopy(registry_info(scorer).metadata)
|
48
|
-
del metadata[SCORER_METRICS]
|
49
|
-
|
78
|
+
for scorer_info in scorers_info:
|
50
79
|
# this scorer
|
51
80
|
scorer_name = unique_scorer_name(
|
52
|
-
|
81
|
+
scorer_info.name, [eval_score.name for eval_score in result_scores]
|
53
82
|
)
|
54
83
|
|
55
84
|
# scores for this scorer
|
@@ -75,7 +104,7 @@ def eval_results(
|
|
75
104
|
|
76
105
|
# Compute metrics for this scorer
|
77
106
|
simple_scores = cast(list[Score], reduced_scores)
|
78
|
-
targets = metrics if metrics is not None else
|
107
|
+
targets = metrics if metrics is not None else scorer_info.metrics
|
79
108
|
if isinstance(targets, list):
|
80
109
|
## split the metrics into the simple metrics and any dictionary
|
81
110
|
## metrics, to be processed independently
|
@@ -88,8 +117,7 @@ def eval_results(
|
|
88
117
|
result_scores.extend(
|
89
118
|
scorer_for_metrics(
|
90
119
|
scorer_name=scorer_name,
|
91
|
-
|
92
|
-
metadata=metadata,
|
120
|
+
scorer_info=scorer_info,
|
93
121
|
scores=simple_scores,
|
94
122
|
metrics=simple_metrics,
|
95
123
|
reducer_name=reducer_display_nm,
|
@@ -99,8 +127,7 @@ def eval_results(
|
|
99
127
|
result_scores.extend(
|
100
128
|
scorers_from_metric_dict(
|
101
129
|
scorer_name=scorer_name,
|
102
|
-
|
103
|
-
metadata=metadata,
|
130
|
+
scorer_info=scorer_info,
|
104
131
|
scores=simple_scores,
|
105
132
|
metrics=dict_metric,
|
106
133
|
reducer_name=reducer_display_nm,
|
@@ -116,8 +143,7 @@ def eval_results(
|
|
116
143
|
result_scores.extend(
|
117
144
|
scorers_from_metric_dict(
|
118
145
|
scorer_name=scorer_name,
|
119
|
-
|
120
|
-
metadata=metadata,
|
146
|
+
scorer_info=scorer_info,
|
121
147
|
scores=simple_scores,
|
122
148
|
metrics=targets,
|
123
149
|
reducer_name=reducer_display_nm,
|
@@ -156,8 +182,7 @@ def split_metrics(
|
|
156
182
|
|
157
183
|
def scorer_for_metrics(
|
158
184
|
scorer_name: str,
|
159
|
-
|
160
|
-
metadata: dict[str, Any],
|
185
|
+
scorer_info: ScorerInfo,
|
161
186
|
scores: list[Score],
|
162
187
|
metrics: list[Metric],
|
163
188
|
reducer_name: str | None = None,
|
@@ -218,8 +243,10 @@ def scorer_for_metrics(
|
|
218
243
|
scorer=scorer_name,
|
219
244
|
reducer=reducer_name,
|
220
245
|
name=scorer_name,
|
221
|
-
params=
|
222
|
-
metadata=metadata
|
246
|
+
params=scorer_info.params,
|
247
|
+
metadata=scorer_info.metadata
|
248
|
+
if len(scorer_info.metadata.keys()) > 0
|
249
|
+
else None,
|
223
250
|
metrics=list_metrics,
|
224
251
|
)
|
225
252
|
)
|
@@ -228,8 +255,7 @@ def scorer_for_metrics(
|
|
228
255
|
|
229
256
|
def scorers_from_metric_dict(
|
230
257
|
scorer_name: str,
|
231
|
-
|
232
|
-
metadata: dict[str, Any],
|
258
|
+
scorer_info: ScorerInfo,
|
233
259
|
scores: list[Score],
|
234
260
|
metrics: dict[str, list[Metric]],
|
235
261
|
reducer_name: str | None = None,
|
@@ -299,8 +325,10 @@ def scorers_from_metric_dict(
|
|
299
325
|
scorer=scorer_name,
|
300
326
|
reducer=reducer_name,
|
301
327
|
name=metric_key,
|
302
|
-
params=
|
303
|
-
metadata=metadata
|
328
|
+
params=scorer_info.params,
|
329
|
+
metadata=scorer_info.metadata
|
330
|
+
if len(scorer_info.metadata.keys()) > 0
|
331
|
+
else None,
|
304
332
|
metrics=result_metrics,
|
305
333
|
)
|
306
334
|
)
|