inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. inspect_ai/_cli/eval.py +27 -0
  2. inspect_ai/_display/textual/widgets/samples.py +3 -3
  3. inspect_ai/_display/textual/widgets/transcript.py +3 -29
  4. inspect_ai/_eval/eval.py +19 -2
  5. inspect_ai/_eval/evalset.py +4 -1
  6. inspect_ai/_eval/run.py +41 -0
  7. inspect_ai/_eval/task/generate.py +38 -44
  8. inspect_ai/_eval/task/log.py +26 -28
  9. inspect_ai/_eval/task/run.py +23 -27
  10. inspect_ai/_util/answer.py +26 -0
  11. inspect_ai/_util/constants.py +0 -1
  12. inspect_ai/_util/local_server.py +398 -0
  13. inspect_ai/_util/working.py +10 -4
  14. inspect_ai/_view/www/dist/assets/index.css +173 -159
  15. inspect_ai/_view/www/dist/assets/index.js +1417 -1142
  16. inspect_ai/_view/www/log-schema.json +379 -3
  17. inspect_ai/_view/www/package.json +1 -1
  18. inspect_ai/_view/www/src/@types/log.d.ts +93 -14
  19. inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
  20. inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
  21. inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
  22. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
  23. inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
  24. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
  25. inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
  26. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
  27. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
  28. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
  29. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
  30. inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
  31. inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
  32. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
  33. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
  34. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
  35. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
  36. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
  37. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
  38. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
  39. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
  40. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
  41. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
  42. inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
  43. inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
  44. inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
  45. inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
  46. inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
  47. inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
  48. inspect_ai/_view/www/src/components/Card.css +0 -1
  49. inspect_ai/_view/www/src/constants.ts +2 -0
  50. inspect_ai/_view/www/src/utils/numeric.ts +17 -0
  51. inspect_ai/agent/_agent.py +3 -3
  52. inspect_ai/agent/_as_solver.py +22 -12
  53. inspect_ai/agent/_as_tool.py +20 -6
  54. inspect_ai/agent/_handoff.py +12 -1
  55. inspect_ai/agent/_react.py +4 -3
  56. inspect_ai/agent/_run.py +16 -3
  57. inspect_ai/agent/_types.py +9 -0
  58. inspect_ai/dataset/_dataset.py +6 -3
  59. inspect_ai/log/__init__.py +14 -0
  60. inspect_ai/log/_convert.py +4 -9
  61. inspect_ai/log/_file.py +56 -0
  62. inspect_ai/log/_log.py +99 -0
  63. inspect_ai/log/_recorders/__init__.py +2 -0
  64. inspect_ai/log/_recorders/buffer/database.py +12 -11
  65. inspect_ai/log/_recorders/buffer/filestore.py +2 -2
  66. inspect_ai/log/_recorders/buffer/types.py +2 -2
  67. inspect_ai/log/_recorders/eval.py +20 -65
  68. inspect_ai/log/_recorders/file.py +28 -6
  69. inspect_ai/log/_recorders/recorder.py +7 -0
  70. inspect_ai/log/_recorders/types.py +1 -23
  71. inspect_ai/log/_samples.py +14 -25
  72. inspect_ai/log/_transcript.py +84 -36
  73. inspect_ai/log/_tree.py +118 -0
  74. inspect_ai/log/_util.py +52 -0
  75. inspect_ai/model/__init__.py +5 -1
  76. inspect_ai/model/_call_tools.py +72 -44
  77. inspect_ai/model/_generate_config.py +14 -8
  78. inspect_ai/model/_model.py +66 -88
  79. inspect_ai/model/_model_output.py +25 -0
  80. inspect_ai/model/_openai.py +2 -0
  81. inspect_ai/model/_providers/anthropic.py +13 -23
  82. inspect_ai/model/_providers/hf.py +27 -1
  83. inspect_ai/model/_providers/openai_o1.py +8 -2
  84. inspect_ai/model/_providers/providers.py +18 -4
  85. inspect_ai/model/_providers/sglang.py +247 -0
  86. inspect_ai/model/_providers/vllm.py +211 -400
  87. inspect_ai/scorer/_choice.py +1 -2
  88. inspect_ai/solver/__init__.py +7 -2
  89. inspect_ai/solver/_basic_agent.py +3 -10
  90. inspect_ai/solver/_chain.py +1 -1
  91. inspect_ai/solver/_fork.py +1 -1
  92. inspect_ai/solver/_multiple_choice.py +5 -22
  93. inspect_ai/solver/_plan.py +2 -2
  94. inspect_ai/solver/_task_state.py +26 -88
  95. inspect_ai/solver/_transcript.py +6 -7
  96. inspect_ai/tool/_json_rpc_helpers.py +45 -17
  97. inspect_ai/tool/_mcp/_mcp.py +8 -5
  98. inspect_ai/tool/_mcp/_sandbox.py +8 -2
  99. inspect_ai/tool/_mcp/server.py +3 -1
  100. inspect_ai/tool/_tool_call.py +4 -1
  101. inspect_ai/tool/_tool_support_helpers.py +51 -12
  102. inspect_ai/tool/_tools/_bash_session.py +190 -68
  103. inspect_ai/tool/_tools/_computer/_computer.py +25 -1
  104. inspect_ai/tool/_tools/_execute.py +4 -1
  105. inspect_ai/tool/_tools/_text_editor.py +4 -3
  106. inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
  107. inspect_ai/util/__init__.py +16 -0
  108. inspect_ai/util/_anyio.py +11 -0
  109. inspect_ai/util/_collect.py +50 -0
  110. inspect_ai/util/_limit.py +393 -0
  111. inspect_ai/util/_limited_conversation.py +57 -0
  112. inspect_ai/util/_span.py +58 -0
  113. inspect_ai/util/_subtask.py +27 -42
  114. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
  115. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
  116. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
  117. inspect_ai/_display/core/group.py +0 -79
  118. inspect_ai/solver/_limit.py +0 -39
  119. inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
  120. inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
  121. inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
  122. inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
  123. inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
  124. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
  125. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
  126. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  127. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
  128. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
  129. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
  130. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
  131. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
  132. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
  133. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
  134. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
  135. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
  136. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
  137. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
  138. inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
  139. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
  140. inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
  141. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
  142. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
  143. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
  144. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  145. inspect_ai/tool/_tools/_computer/test_args.py +0 -151
  146. /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
  147. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
  148. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
  149. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
1
+ import atexit
2
+ from logging import getLogger
3
+ from subprocess import Popen
4
+ from typing import Any
5
+
6
+ from openai import APIStatusError
7
+ from typing_extensions import override
8
+
9
+ from inspect_ai._util.error import PrerequisiteError, pip_dependency_error
10
+ from inspect_ai._util.local_server import (
11
+ configure_devices,
12
+ merge_env_server_args,
13
+ start_local_server,
14
+ terminate_process,
15
+ )
16
+ from inspect_ai.model._chat_message import ChatMessage
17
+ from inspect_ai.model._generate_config import GenerateConfig
18
+ from inspect_ai.model._model_call import ModelCall
19
+ from inspect_ai.model._model_output import ModelOutput
20
+ from inspect_ai.tool._tool_choice import ToolChoice
21
+ from inspect_ai.tool._tool_info import ToolInfo
22
+
23
+ from .openai_compatible import OpenAICompatibleAPI
24
+
25
+ # Environment variable names
26
+ # SGLANG_BASE_URL = "SGLANG_BASE_URL"
27
+ # SGLANG_API_KEY = "SGLANG_API_KEY"
28
+ SGLANG_DEFAULT_SERVER_ARGS = "SGLANG_DEFAULT_SERVER_ARGS"
29
+
30
+ logger = getLogger(__name__)
31
+
32
+
33
+ class SGLangAPI(OpenAICompatibleAPI):
34
+ """
35
+ Provider for using SGLang models.
36
+
37
+ This provider can either:
38
+ 1. Connect to an existing SGLang server (if base_url or port is provided)
39
+ 2. Start a new SGLang server for the specified model
40
+
41
+ Additional server_args:
42
+ timeout (int): Timeout for the server (default: 10 minutes)
43
+ host (str): Host to bind the server to (default: "0.0.0.0")
44
+ device (str): Devices to run the server on. Can be a single device or a list of devices as used in CUDA_VISIBLE_DEVICES. If tp is not provided, the server will use the number of devices as the tensor parallel size.
45
+
46
+ Environment variables:
47
+ SGLANG_BASE_URL: Base URL for an existing SGLang server
48
+ SGLANG_API_KEY: API key for the SGLang server
49
+ SGLANG_DEFAULT_SERVER_ARGS: JSON string of default server args, e.g. '{"tp": 4, "max_model_len": 8192}'
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ model_name: str,
55
+ base_url: str | None = None,
56
+ port: int | None = None,
57
+ api_key: str | None = None,
58
+ config: GenerateConfig = GenerateConfig(),
59
+ **server_args: Any,
60
+ ) -> None:
61
+ # Validate inputs
62
+ if base_url and port:
63
+ raise ValueError("base_url and port cannot both be provided.")
64
+ if port:
65
+ base_url = f"http://localhost:{port}/v1"
66
+
67
+ # Initialize server process and port variables
68
+ self.server_process: Popen[str] | None = None
69
+ self.port: int | None = port
70
+ self.server_args = merge_env_server_args(
71
+ SGLANG_DEFAULT_SERVER_ARGS, server_args, logger
72
+ )
73
+
74
+ self.server_found = True
75
+ try:
76
+ # Try to initialize with existing server
77
+ super().__init__(
78
+ model_name=model_name,
79
+ base_url=base_url,
80
+ api_key=api_key,
81
+ config=config,
82
+ service="SGLang",
83
+ service_base_url=base_url,
84
+ )
85
+ logger.info(f"Using existing SGLang server at {self.base_url}")
86
+ except PrerequisiteError:
87
+ self.server_found = False
88
+
89
+ if not self.server_found:
90
+ logger.warning(
91
+ f"Existing SGLang server not found. Starting new server for {model_name}."
92
+ )
93
+
94
+ # Start the server
95
+ base_url, api_key = self._start_server(model_name, api_key=api_key)
96
+ logger.warning(f"SGLang server started at {base_url}")
97
+
98
+ # Initialize with new server
99
+ super().__init__(
100
+ model_name=model_name,
101
+ base_url=base_url,
102
+ api_key=api_key,
103
+ config=config,
104
+ service="SGLang",
105
+ service_base_url=base_url,
106
+ )
107
+
108
+ def _start_server(
109
+ self,
110
+ model_path: str,
111
+ api_key: str | None = None,
112
+ ) -> tuple[str, str]:
113
+ """Start a new SGLang server and return the base URL and API key.
114
+
115
+ Args:
116
+ model_path: Path to the model to use
117
+ api_key: API key for the server
118
+ Returns:
119
+ tuple[str, str]: The base URL for the server and the API key
120
+ """
121
+ # Verify sglang package is installed since we're starting a server
122
+ try:
123
+ import sglang # type: ignore # noqa: F401
124
+ except ImportError:
125
+ raise pip_dependency_error("SGLang Server", ["sglang"])
126
+
127
+ if not api_key:
128
+ api_key = "inspectai" # Create a default API key if not provided
129
+
130
+ # Handle device configuration
131
+ self.server_args, env_vars = configure_devices(
132
+ self.server_args, parallel_size_param="tp"
133
+ )
134
+
135
+ timeout = self.server_args.pop("timeout", None)
136
+ host = self.server_args.pop("host", "0.0.0.0")
137
+
138
+ # Create server command as a list instead of a string
139
+ cmd = [
140
+ "python", "-m", "sglang.launch_server",
141
+ "--model-path", model_path,
142
+ "--host", host,
143
+ "--api-key", api_key,
144
+ # while the default backend is supposed to be xgrammar, for some reason leaving this
145
+ # unspecified causes the server to fail when using ebnf grammars
146
+ "--grammar-backend", self.server_args.pop("grammar_backend", "xgrammar"),
147
+ ] # fmt: skip
148
+
149
+ base_url, self.server_process, self.port = start_local_server(
150
+ cmd,
151
+ host=host,
152
+ port=None, # find a free port
153
+ api_key=api_key,
154
+ server_type="SGLang",
155
+ timeout=timeout,
156
+ server_args=self.server_args,
157
+ env=env_vars,
158
+ )
159
+
160
+ # Register cleanup function to run when Python exits
161
+ atexit.register(self._cleanup_server)
162
+
163
+ return base_url, api_key
164
+
165
+ @property
166
+ def server_is_running(self) -> bool:
167
+ """Check if the server is running."""
168
+ if self.server_process is None:
169
+ return False
170
+
171
+ # Check if process is still alive
172
+ return self.server_process.poll() is None
173
+
174
+ @override
175
+ def collapse_user_messages(self) -> bool:
176
+ return True
177
+
178
+ @override
179
+ def collapse_assistant_messages(self) -> bool:
180
+ return True
181
+
182
+ def _cleanup_server(self) -> None:
183
+ """Cleanup method to terminate server process when Python exits."""
184
+ if self.server_is_running and self.server_process is not None:
185
+ logger.info("Cleaning up SGLang server")
186
+ terminate_process(self.server_process)
187
+ self.server_process, self.port = None, None
188
+
189
+ async def aclose(self) -> None:
190
+ """Close the client and terminate the server if we started it."""
191
+ # Close the OpenAI client
192
+ await super().aclose()
193
+
194
+ self.close()
195
+
196
+ def close(self) -> None:
197
+ """
198
+ Terminate the server if we started it.
199
+
200
+ Note that this does not close the OpenAI client as we are not in an async context.
201
+ """
202
+ self._cleanup_server()
203
+
204
+ # Deregister the atexit handler since we've manually cleaned up
205
+ atexit.unregister(self._cleanup_server)
206
+
207
+ async def generate(
208
+ self,
209
+ input: list[ChatMessage],
210
+ tools: list[ToolInfo],
211
+ tool_choice: ToolChoice,
212
+ config: GenerateConfig,
213
+ ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
214
+ # check if last message is an assistant message, in this case we want to
215
+ # continue the final message instead of generating a new one
216
+ if input[-1].role == "assistant":
217
+ # Create a copy of the config to avoid modifying the original
218
+ config = config.model_copy()
219
+
220
+ # Set these parameters in extra_body
221
+ if config.extra_body is None:
222
+ config.extra_body = {}
223
+
224
+ # Only set these values if they're not already present in extra_body
225
+ if (
226
+ "add_generation_prompt" not in config.extra_body
227
+ and "continue_final_message" not in config.extra_body
228
+ ):
229
+ config.extra_body["add_generation_prompt"] = False
230
+ config.extra_body["continue_final_message"] = True
231
+
232
+ return await super().generate(input, tools, tool_choice, config)
233
+
234
+ @override
235
+ def handle_bad_request(self, ex: APIStatusError) -> ModelOutput | Exception:
236
+ if ex.status_code == 400:
237
+ # Extract message safely
238
+ if isinstance(ex.body, dict) and "message" in ex.body:
239
+ content = str(ex.body.get("message"))
240
+ else:
241
+ content = ex.message
242
+
243
+ if "context length" in content:
244
+ return ModelOutput.from_content(
245
+ self.model_name, content=content, stop_reason="model_length"
246
+ )
247
+ return ex