inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +27 -0
- inspect_ai/_display/textual/widgets/samples.py +3 -3
- inspect_ai/_display/textual/widgets/transcript.py +3 -29
- inspect_ai/_eval/eval.py +19 -2
- inspect_ai/_eval/evalset.py +4 -1
- inspect_ai/_eval/run.py +41 -0
- inspect_ai/_eval/task/generate.py +38 -44
- inspect_ai/_eval/task/log.py +26 -28
- inspect_ai/_eval/task/run.py +23 -27
- inspect_ai/_util/answer.py +26 -0
- inspect_ai/_util/constants.py +0 -1
- inspect_ai/_util/local_server.py +398 -0
- inspect_ai/_util/working.py +10 -4
- inspect_ai/_view/www/dist/assets/index.css +173 -159
- inspect_ai/_view/www/dist/assets/index.js +1417 -1142
- inspect_ai/_view/www/log-schema.json +379 -3
- inspect_ai/_view/www/package.json +1 -1
- inspect_ai/_view/www/src/@types/log.d.ts +93 -14
- inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
- inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
- inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
- inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
- inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
- inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
- inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
- inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
- inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
- inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
- inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
- inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
- inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
- inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
- inspect_ai/_view/www/src/components/Card.css +0 -1
- inspect_ai/_view/www/src/constants.ts +2 -0
- inspect_ai/_view/www/src/utils/numeric.ts +17 -0
- inspect_ai/agent/_agent.py +3 -3
- inspect_ai/agent/_as_solver.py +22 -12
- inspect_ai/agent/_as_tool.py +20 -6
- inspect_ai/agent/_handoff.py +12 -1
- inspect_ai/agent/_react.py +4 -3
- inspect_ai/agent/_run.py +16 -3
- inspect_ai/agent/_types.py +9 -0
- inspect_ai/dataset/_dataset.py +6 -3
- inspect_ai/log/__init__.py +14 -0
- inspect_ai/log/_convert.py +4 -9
- inspect_ai/log/_file.py +56 -0
- inspect_ai/log/_log.py +99 -0
- inspect_ai/log/_recorders/__init__.py +2 -0
- inspect_ai/log/_recorders/buffer/database.py +12 -11
- inspect_ai/log/_recorders/buffer/filestore.py +2 -2
- inspect_ai/log/_recorders/buffer/types.py +2 -2
- inspect_ai/log/_recorders/eval.py +20 -65
- inspect_ai/log/_recorders/file.py +28 -6
- inspect_ai/log/_recorders/recorder.py +7 -0
- inspect_ai/log/_recorders/types.py +1 -23
- inspect_ai/log/_samples.py +14 -25
- inspect_ai/log/_transcript.py +84 -36
- inspect_ai/log/_tree.py +118 -0
- inspect_ai/log/_util.py +52 -0
- inspect_ai/model/__init__.py +5 -1
- inspect_ai/model/_call_tools.py +72 -44
- inspect_ai/model/_generate_config.py +14 -8
- inspect_ai/model/_model.py +66 -88
- inspect_ai/model/_model_output.py +25 -0
- inspect_ai/model/_openai.py +2 -0
- inspect_ai/model/_providers/anthropic.py +13 -23
- inspect_ai/model/_providers/hf.py +27 -1
- inspect_ai/model/_providers/openai_o1.py +8 -2
- inspect_ai/model/_providers/providers.py +18 -4
- inspect_ai/model/_providers/sglang.py +247 -0
- inspect_ai/model/_providers/vllm.py +211 -400
- inspect_ai/scorer/_choice.py +1 -2
- inspect_ai/solver/__init__.py +7 -2
- inspect_ai/solver/_basic_agent.py +3 -10
- inspect_ai/solver/_chain.py +1 -1
- inspect_ai/solver/_fork.py +1 -1
- inspect_ai/solver/_multiple_choice.py +5 -22
- inspect_ai/solver/_plan.py +2 -2
- inspect_ai/solver/_task_state.py +26 -88
- inspect_ai/solver/_transcript.py +6 -7
- inspect_ai/tool/_json_rpc_helpers.py +45 -17
- inspect_ai/tool/_mcp/_mcp.py +8 -5
- inspect_ai/tool/_mcp/_sandbox.py +8 -2
- inspect_ai/tool/_mcp/server.py +3 -1
- inspect_ai/tool/_tool_call.py +4 -1
- inspect_ai/tool/_tool_support_helpers.py +51 -12
- inspect_ai/tool/_tools/_bash_session.py +190 -68
- inspect_ai/tool/_tools/_computer/_computer.py +25 -1
- inspect_ai/tool/_tools/_execute.py +4 -1
- inspect_ai/tool/_tools/_text_editor.py +4 -3
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
- inspect_ai/util/__init__.py +16 -0
- inspect_ai/util/_anyio.py +11 -0
- inspect_ai/util/_collect.py +50 -0
- inspect_ai/util/_limit.py +393 -0
- inspect_ai/util/_limited_conversation.py +57 -0
- inspect_ai/util/_span.py +58 -0
- inspect_ai/util/_subtask.py +27 -42
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
- inspect_ai/_display/core/group.py +0 -79
- inspect_ai/solver/_limit.py +0 -39
- inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
- inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
- inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
- inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
- inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
- inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
- inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_computer/test_args.py +0 -151
- /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
|
|
1
|
+
import atexit
|
2
|
+
from logging import getLogger
|
3
|
+
from subprocess import Popen
|
4
|
+
from typing import Any
|
5
|
+
|
6
|
+
from openai import APIStatusError
|
7
|
+
from typing_extensions import override
|
8
|
+
|
9
|
+
from inspect_ai._util.error import PrerequisiteError, pip_dependency_error
|
10
|
+
from inspect_ai._util.local_server import (
|
11
|
+
configure_devices,
|
12
|
+
merge_env_server_args,
|
13
|
+
start_local_server,
|
14
|
+
terminate_process,
|
15
|
+
)
|
16
|
+
from inspect_ai.model._chat_message import ChatMessage
|
17
|
+
from inspect_ai.model._generate_config import GenerateConfig
|
18
|
+
from inspect_ai.model._model_call import ModelCall
|
19
|
+
from inspect_ai.model._model_output import ModelOutput
|
20
|
+
from inspect_ai.tool._tool_choice import ToolChoice
|
21
|
+
from inspect_ai.tool._tool_info import ToolInfo
|
22
|
+
|
23
|
+
from .openai_compatible import OpenAICompatibleAPI
|
24
|
+
|
25
|
+
# Environment variable names
|
26
|
+
# SGLANG_BASE_URL = "SGLANG_BASE_URL"
|
27
|
+
# SGLANG_API_KEY = "SGLANG_API_KEY"
|
28
|
+
SGLANG_DEFAULT_SERVER_ARGS = "SGLANG_DEFAULT_SERVER_ARGS"
|
29
|
+
|
30
|
+
logger = getLogger(__name__)
|
31
|
+
|
32
|
+
|
33
|
+
class SGLangAPI(OpenAICompatibleAPI):
|
34
|
+
"""
|
35
|
+
Provider for using SGLang models.
|
36
|
+
|
37
|
+
This provider can either:
|
38
|
+
1. Connect to an existing SGLang server (if base_url or port is provided)
|
39
|
+
2. Start a new SGLang server for the specified model
|
40
|
+
|
41
|
+
Additional server_args:
|
42
|
+
timeout (int): Timeout for the server (default: 10 minutes)
|
43
|
+
host (str): Host to bind the server to (default: "0.0.0.0")
|
44
|
+
device (str): Devices to run the server on. Can be a single device or a list of devices as used in CUDA_VISIBLE_DEVICES. If tp is not provided, the server will use the number of devices as the tensor parallel size.
|
45
|
+
|
46
|
+
Environment variables:
|
47
|
+
SGLANG_BASE_URL: Base URL for an existing SGLang server
|
48
|
+
SGLANG_API_KEY: API key for the SGLang server
|
49
|
+
SGLANG_DEFAULT_SERVER_ARGS: JSON string of default server args, e.g. '{"tp": 4, "max_model_len": 8192}'
|
50
|
+
"""
|
51
|
+
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
model_name: str,
|
55
|
+
base_url: str | None = None,
|
56
|
+
port: int | None = None,
|
57
|
+
api_key: str | None = None,
|
58
|
+
config: GenerateConfig = GenerateConfig(),
|
59
|
+
**server_args: Any,
|
60
|
+
) -> None:
|
61
|
+
# Validate inputs
|
62
|
+
if base_url and port:
|
63
|
+
raise ValueError("base_url and port cannot both be provided.")
|
64
|
+
if port:
|
65
|
+
base_url = f"http://localhost:{port}/v1"
|
66
|
+
|
67
|
+
# Initialize server process and port variables
|
68
|
+
self.server_process: Popen[str] | None = None
|
69
|
+
self.port: int | None = port
|
70
|
+
self.server_args = merge_env_server_args(
|
71
|
+
SGLANG_DEFAULT_SERVER_ARGS, server_args, logger
|
72
|
+
)
|
73
|
+
|
74
|
+
self.server_found = True
|
75
|
+
try:
|
76
|
+
# Try to initialize with existing server
|
77
|
+
super().__init__(
|
78
|
+
model_name=model_name,
|
79
|
+
base_url=base_url,
|
80
|
+
api_key=api_key,
|
81
|
+
config=config,
|
82
|
+
service="SGLang",
|
83
|
+
service_base_url=base_url,
|
84
|
+
)
|
85
|
+
logger.info(f"Using existing SGLang server at {self.base_url}")
|
86
|
+
except PrerequisiteError:
|
87
|
+
self.server_found = False
|
88
|
+
|
89
|
+
if not self.server_found:
|
90
|
+
logger.warning(
|
91
|
+
f"Existing SGLang server not found. Starting new server for {model_name}."
|
92
|
+
)
|
93
|
+
|
94
|
+
# Start the server
|
95
|
+
base_url, api_key = self._start_server(model_name, api_key=api_key)
|
96
|
+
logger.warning(f"SGLang server started at {base_url}")
|
97
|
+
|
98
|
+
# Initialize with new server
|
99
|
+
super().__init__(
|
100
|
+
model_name=model_name,
|
101
|
+
base_url=base_url,
|
102
|
+
api_key=api_key,
|
103
|
+
config=config,
|
104
|
+
service="SGLang",
|
105
|
+
service_base_url=base_url,
|
106
|
+
)
|
107
|
+
|
108
|
+
def _start_server(
|
109
|
+
self,
|
110
|
+
model_path: str,
|
111
|
+
api_key: str | None = None,
|
112
|
+
) -> tuple[str, str]:
|
113
|
+
"""Start a new SGLang server and return the base URL and API key.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
model_path: Path to the model to use
|
117
|
+
api_key: API key for the server
|
118
|
+
Returns:
|
119
|
+
tuple[str, str]: The base URL for the server and the API key
|
120
|
+
"""
|
121
|
+
# Verify sglang package is installed since we're starting a server
|
122
|
+
try:
|
123
|
+
import sglang # type: ignore # noqa: F401
|
124
|
+
except ImportError:
|
125
|
+
raise pip_dependency_error("SGLang Server", ["sglang"])
|
126
|
+
|
127
|
+
if not api_key:
|
128
|
+
api_key = "inspectai" # Create a default API key if not provided
|
129
|
+
|
130
|
+
# Handle device configuration
|
131
|
+
self.server_args, env_vars = configure_devices(
|
132
|
+
self.server_args, parallel_size_param="tp"
|
133
|
+
)
|
134
|
+
|
135
|
+
timeout = self.server_args.pop("timeout", None)
|
136
|
+
host = self.server_args.pop("host", "0.0.0.0")
|
137
|
+
|
138
|
+
# Create server command as a list instead of a string
|
139
|
+
cmd = [
|
140
|
+
"python", "-m", "sglang.launch_server",
|
141
|
+
"--model-path", model_path,
|
142
|
+
"--host", host,
|
143
|
+
"--api-key", api_key,
|
144
|
+
# while the default backend is supposed to be xgrammar, for some reason leaving this
|
145
|
+
# unspecified causes the server to fail when using ebnf grammars
|
146
|
+
"--grammar-backend", self.server_args.pop("grammar_backend", "xgrammar"),
|
147
|
+
] # fmt: skip
|
148
|
+
|
149
|
+
base_url, self.server_process, self.port = start_local_server(
|
150
|
+
cmd,
|
151
|
+
host=host,
|
152
|
+
port=None, # find a free port
|
153
|
+
api_key=api_key,
|
154
|
+
server_type="SGLang",
|
155
|
+
timeout=timeout,
|
156
|
+
server_args=self.server_args,
|
157
|
+
env=env_vars,
|
158
|
+
)
|
159
|
+
|
160
|
+
# Register cleanup function to run when Python exits
|
161
|
+
atexit.register(self._cleanup_server)
|
162
|
+
|
163
|
+
return base_url, api_key
|
164
|
+
|
165
|
+
@property
|
166
|
+
def server_is_running(self) -> bool:
|
167
|
+
"""Check if the server is running."""
|
168
|
+
if self.server_process is None:
|
169
|
+
return False
|
170
|
+
|
171
|
+
# Check if process is still alive
|
172
|
+
return self.server_process.poll() is None
|
173
|
+
|
174
|
+
@override
|
175
|
+
def collapse_user_messages(self) -> bool:
|
176
|
+
return True
|
177
|
+
|
178
|
+
@override
|
179
|
+
def collapse_assistant_messages(self) -> bool:
|
180
|
+
return True
|
181
|
+
|
182
|
+
def _cleanup_server(self) -> None:
|
183
|
+
"""Cleanup method to terminate server process when Python exits."""
|
184
|
+
if self.server_is_running and self.server_process is not None:
|
185
|
+
logger.info("Cleaning up SGLang server")
|
186
|
+
terminate_process(self.server_process)
|
187
|
+
self.server_process, self.port = None, None
|
188
|
+
|
189
|
+
async def aclose(self) -> None:
|
190
|
+
"""Close the client and terminate the server if we started it."""
|
191
|
+
# Close the OpenAI client
|
192
|
+
await super().aclose()
|
193
|
+
|
194
|
+
self.close()
|
195
|
+
|
196
|
+
def close(self) -> None:
|
197
|
+
"""
|
198
|
+
Terminate the server if we started it.
|
199
|
+
|
200
|
+
Note that this does not close the OpenAI client as we are not in an async context.
|
201
|
+
"""
|
202
|
+
self._cleanup_server()
|
203
|
+
|
204
|
+
# Deregister the atexit handler since we've manually cleaned up
|
205
|
+
atexit.unregister(self._cleanup_server)
|
206
|
+
|
207
|
+
async def generate(
|
208
|
+
self,
|
209
|
+
input: list[ChatMessage],
|
210
|
+
tools: list[ToolInfo],
|
211
|
+
tool_choice: ToolChoice,
|
212
|
+
config: GenerateConfig,
|
213
|
+
) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
|
214
|
+
# check if last message is an assistant message, in this case we want to
|
215
|
+
# continue the final message instead of generating a new one
|
216
|
+
if input[-1].role == "assistant":
|
217
|
+
# Create a copy of the config to avoid modifying the original
|
218
|
+
config = config.model_copy()
|
219
|
+
|
220
|
+
# Set these parameters in extra_body
|
221
|
+
if config.extra_body is None:
|
222
|
+
config.extra_body = {}
|
223
|
+
|
224
|
+
# Only set these values if they're not already present in extra_body
|
225
|
+
if (
|
226
|
+
"add_generation_prompt" not in config.extra_body
|
227
|
+
and "continue_final_message" not in config.extra_body
|
228
|
+
):
|
229
|
+
config.extra_body["add_generation_prompt"] = False
|
230
|
+
config.extra_body["continue_final_message"] = True
|
231
|
+
|
232
|
+
return await super().generate(input, tools, tool_choice, config)
|
233
|
+
|
234
|
+
@override
|
235
|
+
def handle_bad_request(self, ex: APIStatusError) -> ModelOutput | Exception:
|
236
|
+
if ex.status_code == 400:
|
237
|
+
# Extract message safely
|
238
|
+
if isinstance(ex.body, dict) and "message" in ex.body:
|
239
|
+
content = str(ex.body.get("message"))
|
240
|
+
else:
|
241
|
+
content = ex.message
|
242
|
+
|
243
|
+
if "context length" in content:
|
244
|
+
return ModelOutput.from_content(
|
245
|
+
self.model_name, content=content, stop_reason="model_length"
|
246
|
+
)
|
247
|
+
return ex
|