inspect-ai 0.3.92__py3-none-any.whl → 0.3.93__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. inspect_ai/_cli/eval.py +27 -0
  2. inspect_ai/_eval/eval.py +19 -2
  3. inspect_ai/_eval/evalset.py +4 -1
  4. inspect_ai/_eval/run.py +41 -0
  5. inspect_ai/_eval/task/generate.py +38 -44
  6. inspect_ai/_eval/task/log.py +26 -28
  7. inspect_ai/_eval/task/run.py +13 -20
  8. inspect_ai/_util/local_server.py +368 -0
  9. inspect_ai/_util/working.py +10 -4
  10. inspect_ai/_view/www/dist/assets/index.css +159 -146
  11. inspect_ai/_view/www/dist/assets/index.js +1020 -1061
  12. inspect_ai/_view/www/log-schema.json +4 -3
  13. inspect_ai/_view/www/package.json +1 -1
  14. inspect_ai/_view/www/src/@types/log.d.ts +3 -2
  15. inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
  16. inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
  17. inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
  18. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
  19. inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
  20. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
  21. inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
  22. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
  23. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
  24. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
  25. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
  26. inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
  27. inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
  28. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
  29. inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
  30. inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
  31. inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
  32. inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
  33. inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
  34. inspect_ai/_view/www/src/components/Card.css +0 -1
  35. inspect_ai/_view/www/src/constants.ts +2 -0
  36. inspect_ai/_view/www/src/utils/numeric.ts +17 -0
  37. inspect_ai/agent/_agent.py +3 -3
  38. inspect_ai/agent/_as_solver.py +20 -12
  39. inspect_ai/agent/_as_tool.py +15 -3
  40. inspect_ai/agent/_handoff.py +8 -1
  41. inspect_ai/agent/_run.py +11 -3
  42. inspect_ai/log/__init__.py +4 -0
  43. inspect_ai/log/_file.py +56 -0
  44. inspect_ai/log/_log.py +99 -0
  45. inspect_ai/log/_recorders/__init__.py +2 -0
  46. inspect_ai/log/_recorders/buffer/database.py +12 -11
  47. inspect_ai/log/_recorders/buffer/filestore.py +2 -2
  48. inspect_ai/log/_recorders/buffer/types.py +2 -2
  49. inspect_ai/log/_recorders/eval.py +20 -65
  50. inspect_ai/log/_recorders/file.py +28 -6
  51. inspect_ai/log/_recorders/recorder.py +7 -0
  52. inspect_ai/log/_recorders/types.py +1 -23
  53. inspect_ai/log/_samples.py +0 -8
  54. inspect_ai/log/_transcript.py +7 -1
  55. inspect_ai/log/_util.py +52 -0
  56. inspect_ai/model/__init__.py +5 -1
  57. inspect_ai/model/_call_tools.py +32 -12
  58. inspect_ai/model/_generate_config.py +14 -8
  59. inspect_ai/model/_model.py +21 -48
  60. inspect_ai/model/_model_output.py +25 -0
  61. inspect_ai/model/_openai.py +2 -0
  62. inspect_ai/model/_providers/anthropic.py +13 -23
  63. inspect_ai/model/_providers/openai_o1.py +8 -2
  64. inspect_ai/model/_providers/providers.py +18 -4
  65. inspect_ai/model/_providers/sglang.py +241 -0
  66. inspect_ai/model/_providers/vllm.py +207 -400
  67. inspect_ai/solver/__init__.py +7 -2
  68. inspect_ai/solver/_basic_agent.py +3 -10
  69. inspect_ai/solver/_task_state.py +26 -88
  70. inspect_ai/tool/_json_rpc_helpers.py +45 -17
  71. inspect_ai/tool/_mcp/_mcp.py +2 -0
  72. inspect_ai/tool/_mcp/_sandbox.py +8 -2
  73. inspect_ai/tool/_mcp/server.py +3 -1
  74. inspect_ai/tool/_tool_call.py +4 -1
  75. inspect_ai/tool/_tool_support_helpers.py +51 -12
  76. inspect_ai/tool/_tools/_bash_session.py +190 -68
  77. inspect_ai/tool/_tools/_computer/_computer.py +25 -1
  78. inspect_ai/tool/_tools/_text_editor.py +4 -3
  79. inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
  80. inspect_ai/util/__init__.py +12 -0
  81. inspect_ai/util/_limit.py +393 -0
  82. inspect_ai/util/_limited_conversation.py +57 -0
  83. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/METADATA +1 -1
  84. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/RECORD +89 -108
  85. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/WHEEL +1 -1
  86. inspect_ai/solver/_limit.py +0 -39
  87. inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
  88. inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
  89. inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
  90. inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
  91. inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
  92. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
  93. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
  94. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  95. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
  96. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
  97. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
  98. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
  99. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
  100. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
  101. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
  102. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
  103. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
  104. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
  105. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
  106. inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
  107. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
  108. inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
  109. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
  110. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
  111. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
  112. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  113. inspect_ai/tool/_tools/_computer/test_args.py +0 -151
  114. /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
  115. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/entry_points.txt +0 -0
  116. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/licenses/LICENSE +0 -0
  117. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,241 @@
1
+ import atexit
2
+ from logging import getLogger
3
+ from subprocess import Popen
4
+ from typing import Any
5
+
6
+ from openai import APIStatusError
7
+ from typing_extensions import override
8
+
9
+ from inspect_ai._util.error import PrerequisiteError, pip_dependency_error
10
+ from inspect_ai._util.local_server import (
11
+ configure_devices,
12
+ merge_env_server_args,
13
+ start_local_server,
14
+ terminate_process,
15
+ )
16
+ from inspect_ai.model._chat_message import ChatMessage
17
+ from inspect_ai.model._generate_config import GenerateConfig
18
+ from inspect_ai.model._model_call import ModelCall
19
+ from inspect_ai.model._model_output import ModelOutput
20
+ from inspect_ai.tool._tool_choice import ToolChoice
21
+ from inspect_ai.tool._tool_info import ToolInfo
22
+
23
+ from .openai_compatible import OpenAICompatibleAPI
24
+
25
+ # Environment variable names
26
+ # SGLANG_BASE_URL = "SGLANG_BASE_URL"
27
+ # SGLANG_API_KEY = "SGLANG_API_KEY"
28
+ SGLANG_DEFAULT_SERVER_ARGS = "SGLANG_DEFAULT_SERVER_ARGS"
29
+
30
+ logger = getLogger(__name__)
31
+
32
+
33
+ class SGLangAPI(OpenAICompatibleAPI):
34
+ """
35
+ Provider for using SGLang models.
36
+
37
+ This provider can either:
38
+ 1. Connect to an existing SGLang server (if base_url or port is provided)
39
+ 2. Start a new SGLang server for the specified model
40
+
41
+ Additional server_args:
42
+ timeout (int): Timeout for the server (default: 10 minutes)
43
+ host (str): Host to bind the server to (default: "0.0.0.0")
44
+ device (str): Devices to run the server on. Can be a single device or a list of devices as used in CUDA_VISIBLE_DEVICES. If tp is not provided, the server will use the number of devices as the tensor parallel size.
45
+
46
+ Environment variables:
47
+ SGLANG_BASE_URL: Base URL for an existing SGLang server
48
+ SGLANG_API_KEY: API key for the SGLang server
49
+ SGLANG_DEFAULT_SERVER_ARGS: JSON string of default server args, e.g. '{"tp": 4, "max_model_len": 8192}'
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ model_name: str,
55
+ base_url: str | None = None,
56
+ port: int | None = None,
57
+ api_key: str | None = None,
58
+ config: GenerateConfig = GenerateConfig(),
59
+ **server_args: Any,
60
+ ) -> None:
61
+ # Validate inputs
62
+ if base_url and port:
63
+ raise ValueError("base_url and port cannot both be provided.")
64
+ if port:
65
+ base_url = f"http://localhost:{port}/v1"
66
+
67
+ # Initialize server process and port variables
68
+ self.server_process: Popen[str] | None = None
69
+ self.port: int | None = port
70
+ self.server_args = merge_env_server_args(
71
+ SGLANG_DEFAULT_SERVER_ARGS, server_args, logger
72
+ )
73
+
74
+ try:
75
+ # Try to initialize with existing server
76
+ super().__init__(
77
+ model_name=model_name,
78
+ base_url=base_url,
79
+ api_key=api_key,
80
+ config=config,
81
+ service="SGLang",
82
+ service_base_url=base_url,
83
+ )
84
+ logger.info(f"Using existing SGLang server at {self.base_url}")
85
+ except PrerequisiteError:
86
+ # No existing server found, start a new one
87
+ logger.warning(
88
+ f"Existing SGLang server not found. Starting new server for {model_name}."
89
+ )
90
+
91
+ # Start the server
92
+ base_url, api_key = self._start_server(model_name, api_key=api_key)
93
+ logger.warning(f"SGLang server started at {base_url}")
94
+
95
+ # Initialize with new server
96
+ super().__init__(
97
+ model_name=model_name,
98
+ base_url=base_url,
99
+ api_key=api_key,
100
+ config=config,
101
+ service="SGLang",
102
+ service_base_url=base_url,
103
+ )
104
+
105
+ def _start_server(
106
+ self,
107
+ model_path: str,
108
+ api_key: str | None = None,
109
+ ) -> tuple[str, str]:
110
+ """Start a new SGLang server and return the base URL and API key.
111
+
112
+ Args:
113
+ model_path: Path to the model to use
114
+ api_key: API key for the server
115
+ Returns:
116
+ tuple[str, str]: The base URL for the server and the API key
117
+ """
118
+ # Verify sglang package is installed since we're starting a server
119
+ try:
120
+ import sglang # type: ignore # noqa: F401
121
+ except ImportError:
122
+ raise pip_dependency_error("SGLang Server", ["sglang"])
123
+
124
+ if not api_key:
125
+ api_key = "inspectai" # Create a default API key if not provided
126
+
127
+ # Handle device configuration
128
+ self.server_args = configure_devices(self.server_args, parallel_size_param="tp")
129
+
130
+ timeout = self.server_args.pop("timeout", None)
131
+ host = self.server_args.pop("host", "0.0.0.0")
132
+
133
+ # Create server command as a list instead of a string
134
+ cmd = [
135
+ "python", "-m", "sglang.launch_server",
136
+ "--model-path", model_path,
137
+ "--host", host,
138
+ "--api-key", api_key,
139
+ # while the default backend is supposed to be xgrammar, for some reason leaving this
140
+ # unspecified causes the server to fail when using ebnf grammars
141
+ "--grammar-backend", self.server_args.pop("grammar_backend", "xgrammar"),
142
+ ] # fmt: skip
143
+
144
+ base_url, self.server_process, self.port = start_local_server(
145
+ cmd,
146
+ host=host,
147
+ port=None, # find a free port
148
+ api_key=api_key,
149
+ server_type="SGLang",
150
+ timeout=timeout,
151
+ server_args=self.server_args,
152
+ )
153
+
154
+ # Register cleanup function to run when Python exits
155
+ atexit.register(self._cleanup_server)
156
+
157
+ return base_url, api_key
158
+
159
+ @property
160
+ def server_is_running(self) -> bool:
161
+ """Check if the server is running."""
162
+ if self.server_process is None:
163
+ return False
164
+
165
+ # Check if process is still alive
166
+ return self.server_process.poll() is None
167
+
168
+ @override
169
+ def collapse_user_messages(self) -> bool:
170
+ return True
171
+
172
+ @override
173
+ def collapse_assistant_messages(self) -> bool:
174
+ return True
175
+
176
+ def _cleanup_server(self) -> None:
177
+ """Cleanup method to terminate server process when Python exits."""
178
+ if self.server_is_running and self.server_process is not None:
179
+ logger.info("Cleaning up SGLang server")
180
+ terminate_process(self.server_process)
181
+ self.server_process, self.port = None, None
182
+
183
+ async def aclose(self) -> None:
184
+ """Close the client and terminate the server if we started it."""
185
+ # Close the OpenAI client
186
+ await super().aclose()
187
+
188
+ self.close()
189
+
190
+ def close(self) -> None:
191
+ """
192
+ Terminate the server if we started it.
193
+
194
+ Note that this does not close the OpenAI client as we are not in an async context.
195
+ """
196
+ self._cleanup_server()
197
+
198
+ # Deregister the atexit handler since we've manually cleaned up
199
+ atexit.unregister(self._cleanup_server)
200
+
201
+ async def generate(
202
+ self,
203
+ input: list[ChatMessage],
204
+ tools: list[ToolInfo],
205
+ tool_choice: ToolChoice,
206
+ config: GenerateConfig,
207
+ ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
208
+ # check if last message is an assistant message, in this case we want to
209
+ # continue the final message instead of generating a new one
210
+ if input[-1].role == "assistant":
211
+ # Create a copy of the config to avoid modifying the original
212
+ config = config.model_copy()
213
+
214
+ # Set these parameters in extra_body
215
+ if config.extra_body is None:
216
+ config.extra_body = {}
217
+
218
+ # Only set these values if they're not already present in extra_body
219
+ if (
220
+ "add_generation_prompt" not in config.extra_body
221
+ and "continue_final_message" not in config.extra_body
222
+ ):
223
+ config.extra_body["add_generation_prompt"] = False
224
+ config.extra_body["continue_final_message"] = True
225
+
226
+ return await super().generate(input, tools, tool_choice, config)
227
+
228
+ @override
229
+ def handle_bad_request(self, ex: APIStatusError) -> ModelOutput | Exception:
230
+ if ex.status_code == 400:
231
+ # Extract message safely
232
+ if isinstance(ex.body, dict) and "message" in ex.body:
233
+ content = str(ex.body.get("message"))
234
+ else:
235
+ content = ex.message
236
+
237
+ if "context length" in content:
238
+ return ModelOutput.from_content(
239
+ self.model_name, content=content, stop_reason="model_length"
240
+ )
241
+ return ex