inspect-ai 0.3.87__py3-none-any.whl → 0.3.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +16 -0
- inspect_ai/_cli/score.py +1 -12
- inspect_ai/_cli/util.py +4 -2
- inspect_ai/_display/core/footer.py +2 -2
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_eval/context.py +7 -1
- inspect_ai/_eval/eval.py +51 -27
- inspect_ai/_eval/evalset.py +27 -10
- inspect_ai/_eval/loader.py +7 -8
- inspect_ai/_eval/run.py +23 -31
- inspect_ai/_eval/score.py +18 -1
- inspect_ai/_eval/task/log.py +5 -13
- inspect_ai/_eval/task/resolved.py +1 -0
- inspect_ai/_eval/task/run.py +231 -244
- inspect_ai/_eval/task/task.py +25 -2
- inspect_ai/_eval/task/util.py +1 -8
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/json.py +8 -3
- inspect_ai/_util/registry.py +30 -13
- inspect_ai/_view/www/App.css +5 -0
- inspect_ai/_view/www/dist/assets/index.css +55 -18
- inspect_ai/_view/www/dist/assets/index.js +550 -458
- inspect_ai/_view/www/log-schema.json +84 -1
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +1 -1
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +13 -8
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +3 -0
- inspect_ai/_view/www/src/plan/ModelCard.module.css +16 -0
- inspect_ai/_view/www/src/plan/ModelCard.tsx +93 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +5 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +6 -29
- inspect_ai/_view/www/src/types/log.d.ts +150 -129
- inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css +16 -0
- inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +5 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -0
- inspect_ai/agent/_agent.py +12 -0
- inspect_ai/agent/_as_tool.py +1 -1
- inspect_ai/agent/_bridge/bridge.py +9 -2
- inspect_ai/agent/_react.py +142 -74
- inspect_ai/agent/_run.py +13 -2
- inspect_ai/agent/_types.py +6 -0
- inspect_ai/approval/_apply.py +6 -9
- inspect_ai/approval/_approver.py +3 -3
- inspect_ai/approval/_auto.py +2 -2
- inspect_ai/approval/_call.py +20 -4
- inspect_ai/approval/_human/approver.py +3 -3
- inspect_ai/approval/_human/manager.py +2 -2
- inspect_ai/approval/_human/panel.py +3 -3
- inspect_ai/approval/_policy.py +3 -3
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_log.py +23 -2
- inspect_ai/log/_model.py +58 -0
- inspect_ai/log/_recorders/file.py +14 -3
- inspect_ai/log/_transcript.py +3 -0
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +15 -2
- inspect_ai/model/_model.py +49 -3
- inspect_ai/model/_openai.py +151 -21
- inspect_ai/model/_providers/anthropic.py +25 -14
- inspect_ai/model/_providers/bedrock.py +3 -3
- inspect_ai/model/_providers/cloudflare.py +29 -108
- inspect_ai/model/_providers/google.py +21 -10
- inspect_ai/model/_providers/grok.py +23 -17
- inspect_ai/model/_providers/groq.py +61 -37
- inspect_ai/model/_providers/llama_cpp_python.py +8 -9
- inspect_ai/model/_providers/mistral.py +8 -3
- inspect_ai/model/_providers/ollama.py +8 -9
- inspect_ai/model/_providers/openai.py +53 -157
- inspect_ai/model/_providers/openai_compatible.py +195 -0
- inspect_ai/model/_providers/openrouter.py +4 -15
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/model/_providers/together.py +25 -23
- inspect_ai/model/_trim.py +83 -0
- inspect_ai/solver/_plan.py +5 -3
- inspect_ai/tool/_tool_call.py +3 -0
- inspect_ai/tool/_tool_def.py +8 -2
- inspect_ai/util/__init__.py +3 -0
- inspect_ai/util/_concurrency.py +15 -2
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/RECORD +86 -81
- inspect_ai/_eval/task/rundir.py +0 -78
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,6 @@ from openai.types.chat import (
|
|
10
10
|
from typing_extensions import override
|
11
11
|
|
12
12
|
from inspect_ai._util.constants import DEFAULT_MAX_TOKENS
|
13
|
-
from inspect_ai.model._providers.util.chatapi import ChatAPIHandler
|
14
13
|
from inspect_ai.tool._tool_choice import ToolChoice
|
15
14
|
from inspect_ai.tool._tool_info import ToolInfo
|
16
15
|
|
@@ -27,16 +26,14 @@ from .._model_output import (
|
|
27
26
|
as_stop_reason,
|
28
27
|
)
|
29
28
|
from .._openai import chat_message_assistant_from_openai
|
30
|
-
from .
|
31
|
-
OpenAIAPI,
|
32
|
-
)
|
29
|
+
from .openai_compatible import OpenAICompatibleAPI
|
33
30
|
from .util import (
|
34
31
|
chat_api_input,
|
35
32
|
chat_api_request,
|
36
|
-
environment_prerequisite_error,
|
37
33
|
model_base_url,
|
38
34
|
should_retry_chat_api_error,
|
39
35
|
)
|
36
|
+
from .util.chatapi import ChatAPIHandler
|
40
37
|
|
41
38
|
|
42
39
|
def chat_choices_from_response_together(
|
@@ -78,10 +75,7 @@ def chat_choices_from_response_together(
|
|
78
75
|
]
|
79
76
|
|
80
77
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
class TogetherAIAPI(OpenAIAPI):
|
78
|
+
class TogetherAIAPI(OpenAICompatibleAPI):
|
85
79
|
def __init__(
|
86
80
|
self,
|
87
81
|
model_name: str,
|
@@ -89,14 +83,13 @@ class TogetherAIAPI(OpenAIAPI):
|
|
89
83
|
api_key: str | None = None,
|
90
84
|
config: GenerateConfig = GenerateConfig(),
|
91
85
|
) -> None:
|
92
|
-
if not api_key:
|
93
|
-
api_key = os.environ.get(TOGETHER_API_KEY, None)
|
94
|
-
if not api_key:
|
95
|
-
raise environment_prerequisite_error("TogetherAI", TOGETHER_API_KEY)
|
96
|
-
base_url = model_base_url(base_url, "TOGETHER_BASE_URL")
|
97
|
-
base_url = base_url if base_url else "https://api.together.xyz/v1"
|
98
86
|
super().__init__(
|
99
|
-
model_name=model_name,
|
87
|
+
model_name=model_name,
|
88
|
+
base_url=base_url,
|
89
|
+
api_key=api_key,
|
90
|
+
config=config,
|
91
|
+
service="Together",
|
92
|
+
service_base_url="https://api.together.xyz/v1",
|
100
93
|
)
|
101
94
|
|
102
95
|
# Together uses a default of 512 so we bump it up
|
@@ -119,22 +112,31 @@ class TogetherAIAPI(OpenAIAPI):
|
|
119
112
|
return ex
|
120
113
|
|
121
114
|
@override
|
122
|
-
def
|
123
|
-
|
124
|
-
|
125
|
-
if config.logprobs is True:
|
115
|
+
def completion_params(self, config: GenerateConfig, tools: bool) -> dict[str, Any]:
|
116
|
+
params = super().completion_params(config, tools)
|
117
|
+
if "logprobs" in params:
|
126
118
|
params["logprobs"] = 1
|
119
|
+
if "top_logprobs" in params:
|
120
|
+
del params["top_logprobs"]
|
121
|
+
|
122
|
+
# together requires temperature with num_choices
|
123
|
+
if config.num_choices is not None and config.temperature is None:
|
124
|
+
params["temperature"] = 1
|
125
|
+
|
127
126
|
return params
|
128
127
|
|
129
128
|
# Together has a slightly different logprobs structure to OpenAI, so we need to remap it.
|
130
|
-
|
131
|
-
|
129
|
+
@override
|
130
|
+
def chat_choices_from_completion(
|
131
|
+
self, completion: ChatCompletion, tools: list[ToolInfo]
|
132
132
|
) -> list[ChatCompletionChoice]:
|
133
|
-
return chat_choices_from_response_together(
|
133
|
+
return chat_choices_from_response_together(completion, tools)
|
134
134
|
|
135
135
|
|
136
136
|
# Implementation of REST client for Together (currently not used)
|
137
137
|
|
138
|
+
TOGETHER_API_KEY = "TOGETHER_API_KEY"
|
139
|
+
|
138
140
|
|
139
141
|
class TogetherRESTAPI(ModelAPI):
|
140
142
|
def __init__(
|
@@ -0,0 +1,83 @@
|
|
1
|
+
from dataclasses import dataclass, field
|
2
|
+
|
3
|
+
from ._chat_message import ChatMessage
|
4
|
+
|
5
|
+
|
6
|
+
def trim_messages(
|
7
|
+
messages: list[ChatMessage], preserve: float = 0.7
|
8
|
+
) -> list[ChatMessage]:
|
9
|
+
"""Trim message list to fit within model context.
|
10
|
+
|
11
|
+
Trim the list of messages by:
|
12
|
+
- Retaining all system messages.
|
13
|
+
- Retaining the 'input' messages from the sample.
|
14
|
+
- Preserving a proportion of the remaining messages (`preserve=0.7` by default).
|
15
|
+
- Ensuring that all assistant tool calls have corresponding tool messages.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
messages: List of messages to trim.
|
19
|
+
preserve: Ratio of converation messages to preserve
|
20
|
+
(defaults to 0.7)
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
Trimmed messages.
|
24
|
+
"""
|
25
|
+
# validate preserve
|
26
|
+
if not 0 <= preserve <= 1:
|
27
|
+
raise ValueError(f"preserve must be in range [0,1], got {preserve}")
|
28
|
+
|
29
|
+
# partition messages
|
30
|
+
partitioned = _partition_messages(messages)
|
31
|
+
|
32
|
+
# slice messages from the beginning of the conversation as-per preserve
|
33
|
+
start_idx = int(len(partitioned.conversation) * (1 - preserve))
|
34
|
+
preserved_messages = partitioned.conversation[start_idx:]
|
35
|
+
|
36
|
+
# one last step: many model apis require tool messages to have a parent assistant
|
37
|
+
# message with a corresponding tool_call_id. to ensure this, we build the
|
38
|
+
# final list of conversation messages by filtering out tool messages for which
|
39
|
+
# we haven't seen a corresponding assistant message with their id
|
40
|
+
conversation_messages: list[ChatMessage] = []
|
41
|
+
active_tool_ids = set()
|
42
|
+
for message in preserved_messages:
|
43
|
+
if message.role == "assistant":
|
44
|
+
active_tool_ids = {tc.id for tc in (message.tool_calls or [])}
|
45
|
+
conversation_messages.append(message)
|
46
|
+
elif message.role == "tool" and message.tool_call_id in active_tool_ids:
|
47
|
+
conversation_messages.append(message)
|
48
|
+
elif message.role == "user":
|
49
|
+
active_tool_ids = set()
|
50
|
+
conversation_messages.append(message)
|
51
|
+
|
52
|
+
# return trimmed messages
|
53
|
+
return partitioned.system + partitioned.input + conversation_messages
|
54
|
+
|
55
|
+
|
56
|
+
@dataclass
|
57
|
+
class PartitionedMessages:
|
58
|
+
system: list[ChatMessage] = field(default_factory=list)
|
59
|
+
input: list[ChatMessage] = field(default_factory=list)
|
60
|
+
conversation: list[ChatMessage] = field(default_factory=list)
|
61
|
+
|
62
|
+
|
63
|
+
def _partition_messages(messages: list[ChatMessage]) -> PartitionedMessages:
|
64
|
+
# first pass at partitioning
|
65
|
+
partitioned = PartitionedMessages()
|
66
|
+
for message in messages:
|
67
|
+
if message.role == "system":
|
68
|
+
partitioned.system.append(message)
|
69
|
+
elif message.source == "input":
|
70
|
+
partitioned.input.append(message)
|
71
|
+
else:
|
72
|
+
partitioned.conversation.append(message)
|
73
|
+
|
74
|
+
# if there are no input messages then take up to the first user message
|
75
|
+
if len(partitioned.input) == 0:
|
76
|
+
while partitioned.conversation:
|
77
|
+
message = partitioned.conversation.pop(0)
|
78
|
+
partitioned.input.append(message)
|
79
|
+
if message.role == "user":
|
80
|
+
break
|
81
|
+
|
82
|
+
# all done!
|
83
|
+
return partitioned
|
inspect_ai/solver/_plan.py
CHANGED
@@ -164,7 +164,7 @@ def plan(*plan: PlanType | None, name: str | None = None, **attribs: Any) -> Any
|
|
164
164
|
plan_type,
|
165
165
|
plan,
|
166
166
|
RegistryInfo(
|
167
|
-
type="plan",
|
167
|
+
type="plan", # type: ignore[arg-type]
|
168
168
|
name=plan_name,
|
169
169
|
metadata=dict(attribs=attribs, params=params),
|
170
170
|
),
|
@@ -212,7 +212,9 @@ def plan_register(
|
|
212
212
|
registry_add(
|
213
213
|
plan,
|
214
214
|
RegistryInfo(
|
215
|
-
type="plan",
|
215
|
+
type="plan", # type: ignore[arg-type]
|
216
|
+
name=name,
|
217
|
+
metadata=dict(attribs=attribs, params=params),
|
216
218
|
),
|
217
219
|
)
|
218
220
|
return plan
|
@@ -228,4 +230,4 @@ def plan_create(name: str, **kwargs: Any) -> Plan:
|
|
228
230
|
Returns:
|
229
231
|
Plan with registry info attribute
|
230
232
|
"""
|
231
|
-
return cast(Plan, registry_create("plan", name, **kwargs))
|
233
|
+
return cast(Plan, registry_create("plan", name, **kwargs)) # type: ignore[arg-type]
|
inspect_ai/tool/_tool_call.py
CHANGED
inspect_ai/tool/_tool_def.py
CHANGED
@@ -234,9 +234,15 @@ def validate_tool_parameters(tool_name: str, parameters: dict[str, ToolParam]) -
|
|
234
234
|
# validate that we have types/descriptions for paramters
|
235
235
|
for param_name, param in parameters.items():
|
236
236
|
|
237
|
-
def raise_not_provided_error(
|
237
|
+
def raise_not_provided_error(
|
238
|
+
context: str,
|
239
|
+
# Use the default value trick to avoid Python's late binding of
|
240
|
+
# closures issue.
|
241
|
+
# see: https://docs.python.org/3/faq/programming.html#why-do-lambdas-defined-in-a-loop-with-different-values-all-return-the-same-result
|
242
|
+
bound_name: str = param_name,
|
243
|
+
) -> None:
|
238
244
|
raise ValueError(
|
239
|
-
f"{context} provided for parameter '{
|
245
|
+
f"{context} provided for parameter '{bound_name}' of function '{tool_name}'."
|
240
246
|
)
|
241
247
|
|
242
248
|
if param.type is None and not param.anyOf and not param.enum:
|
inspect_ai/util/__init__.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
from inspect_ai._util.registry import RegistryType, registry_create
|
1
2
|
from inspect_ai._util.trace import trace_action, trace_message
|
2
3
|
|
3
4
|
from ._concurrency import concurrency
|
@@ -64,4 +65,6 @@ __all__ = [
|
|
64
65
|
"throttle",
|
65
66
|
"trace_action",
|
66
67
|
"trace_message",
|
68
|
+
"RegistryType",
|
69
|
+
"registry_create",
|
67
70
|
]
|
inspect_ai/util/_concurrency.py
CHANGED
@@ -56,10 +56,23 @@ async def concurrency(
|
|
56
56
|
yield
|
57
57
|
|
58
58
|
|
59
|
-
def
|
59
|
+
def concurrency_status_display() -> dict[str, tuple[int, int]]:
|
60
60
|
status: dict[str, tuple[int, int]] = {}
|
61
|
+
names = [c.name for c in _concurrency_semaphores.values()]
|
61
62
|
for c in _concurrency_semaphores.values():
|
62
|
-
|
63
|
+
# compute name for status display. some resources (e.g. models) use
|
64
|
+
# a / prefix. if there are no duplicates of a given prefix then shorten
|
65
|
+
# it to be only the prefix (e.g. 'openai' rather than 'openai/gpt-4o')
|
66
|
+
prefix = c.name.split("/")[0]
|
67
|
+
prefix_count = sum([1 for name in names if name.startswith(prefix + "/")])
|
68
|
+
if prefix_count == 1:
|
69
|
+
name = prefix
|
70
|
+
else:
|
71
|
+
name = c.name
|
72
|
+
|
73
|
+
# status display entry
|
74
|
+
status[name] = (c.concurrency - c.semaphore.value, c.concurrency)
|
75
|
+
|
63
76
|
return status
|
64
77
|
|
65
78
|
|