inspect-ai 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_display/textual/app.py +14 -3
  3. inspect_ai/_display/textual/display.py +4 -0
  4. inspect_ai/_display/textual/widgets/samples.py +9 -3
  5. inspect_ai/_display/textual/widgets/task_detail.py +3 -4
  6. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  7. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  8. inspect_ai/_eval/eval.py +36 -24
  9. inspect_ai/_eval/evalset.py +17 -18
  10. inspect_ai/_eval/loader.py +34 -11
  11. inspect_ai/_eval/run.py +8 -13
  12. inspect_ai/_eval/score.py +13 -3
  13. inspect_ai/_eval/task/generate.py +8 -9
  14. inspect_ai/_eval/task/log.py +2 -0
  15. inspect_ai/_eval/task/task.py +23 -9
  16. inspect_ai/_util/file.py +13 -0
  17. inspect_ai/_util/json.py +2 -1
  18. inspect_ai/_util/registry.py +1 -0
  19. inspect_ai/_util/vscode.py +37 -0
  20. inspect_ai/_view/www/App.css +6 -0
  21. inspect_ai/_view/www/dist/assets/index.css +304 -128
  22. inspect_ai/_view/www/dist/assets/index.js +47495 -27519
  23. inspect_ai/_view/www/log-schema.json +124 -31
  24. inspect_ai/_view/www/package.json +3 -0
  25. inspect_ai/_view/www/src/App.tsx +12 -0
  26. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  27. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  28. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  29. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  30. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
  31. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
  32. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  33. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  34. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  35. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  36. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  37. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
  38. inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
  39. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
  40. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
  41. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
  42. inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
  43. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
  44. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
  45. inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
  46. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
  47. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
  48. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  49. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  50. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  51. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
  52. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
  53. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
  54. inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
  55. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
  56. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
  57. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  58. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  59. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  60. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  61. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
  62. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
  63. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
  64. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
  65. inspect_ai/_view/www/src/state/hooks.ts +5 -3
  66. inspect_ai/_view/www/src/state/logPolling.ts +5 -1
  67. inspect_ai/_view/www/src/state/logSlice.ts +10 -0
  68. inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
  69. inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +34 -26
  71. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  72. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  73. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
  74. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
  75. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
  76. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  77. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  78. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
  79. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  80. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
  81. inspect_ai/_view/www/yarn.lock +94 -1
  82. inspect_ai/agent/__init__.py +36 -0
  83. inspect_ai/agent/_agent.py +268 -0
  84. inspect_ai/agent/_as_solver.py +72 -0
  85. inspect_ai/agent/_as_tool.py +122 -0
  86. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  87. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  88. inspect_ai/agent/_filter.py +46 -0
  89. inspect_ai/agent/_handoff.py +93 -0
  90. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  91. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  92. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  93. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  94. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  95. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  96. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  97. inspect_ai/agent/_react.py +241 -0
  98. inspect_ai/agent/_run.py +36 -0
  99. inspect_ai/agent/_types.py +81 -0
  100. inspect_ai/log/_log.py +11 -2
  101. inspect_ai/log/_transcript.py +13 -9
  102. inspect_ai/model/__init__.py +7 -1
  103. inspect_ai/model/_call_tools.py +256 -52
  104. inspect_ai/model/_chat_message.py +7 -4
  105. inspect_ai/model/_conversation.py +13 -62
  106. inspect_ai/model/_display.py +85 -0
  107. inspect_ai/model/_model.py +113 -14
  108. inspect_ai/model/_model_output.py +14 -9
  109. inspect_ai/model/_openai.py +16 -4
  110. inspect_ai/model/_openai_computer_use.py +162 -0
  111. inspect_ai/model/_openai_responses.py +319 -165
  112. inspect_ai/model/_providers/anthropic.py +20 -21
  113. inspect_ai/model/_providers/azureai.py +24 -13
  114. inspect_ai/model/_providers/bedrock.py +1 -7
  115. inspect_ai/model/_providers/cloudflare.py +3 -3
  116. inspect_ai/model/_providers/goodfire.py +2 -6
  117. inspect_ai/model/_providers/google.py +11 -10
  118. inspect_ai/model/_providers/groq.py +6 -3
  119. inspect_ai/model/_providers/hf.py +7 -3
  120. inspect_ai/model/_providers/mistral.py +7 -10
  121. inspect_ai/model/_providers/openai.py +47 -17
  122. inspect_ai/model/_providers/openai_o1.py +11 -4
  123. inspect_ai/model/_providers/openai_responses.py +12 -14
  124. inspect_ai/model/_providers/providers.py +2 -2
  125. inspect_ai/model/_providers/together.py +12 -2
  126. inspect_ai/model/_providers/util/chatapi.py +7 -2
  127. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  128. inspect_ai/model/_providers/util/llama31.py +4 -2
  129. inspect_ai/model/_providers/vertex.py +11 -9
  130. inspect_ai/model/_providers/vllm.py +4 -4
  131. inspect_ai/scorer/__init__.py +2 -0
  132. inspect_ai/scorer/_metrics/__init__.py +2 -0
  133. inspect_ai/scorer/_metrics/grouped.py +84 -0
  134. inspect_ai/scorer/_score.py +26 -6
  135. inspect_ai/solver/__init__.py +2 -2
  136. inspect_ai/solver/_basic_agent.py +22 -9
  137. inspect_ai/solver/_bridge.py +31 -0
  138. inspect_ai/solver/_chain.py +20 -12
  139. inspect_ai/solver/_fork.py +5 -1
  140. inspect_ai/solver/_human_agent.py +52 -0
  141. inspect_ai/solver/_prompt.py +3 -1
  142. inspect_ai/solver/_run.py +59 -0
  143. inspect_ai/solver/_solver.py +14 -4
  144. inspect_ai/solver/_task_state.py +5 -3
  145. inspect_ai/tool/_tool_call.py +15 -8
  146. inspect_ai/tool/_tool_def.py +17 -12
  147. inspect_ai/tool/_tool_support_helpers.py +2 -2
  148. inspect_ai/tool/_tool_with.py +14 -11
  149. inspect_ai/tool/_tools/_bash_session.py +11 -2
  150. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  151. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  152. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  153. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  154. inspect_ai/tool/_tools/_think.py +1 -1
  155. inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
  156. inspect_ai/util/__init__.py +2 -0
  157. inspect_ai/util/_anyio.py +27 -0
  158. inspect_ai/util/_sandbox/__init__.py +2 -1
  159. inspect_ai/util/_sandbox/context.py +32 -7
  160. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  161. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  162. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  163. inspect_ai/util/_store_model.py +30 -7
  164. inspect_ai/util/_subprocess.py +13 -3
  165. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  166. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +179 -153
  167. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
  168. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  169. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  170. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  171. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  172. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  173. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  174. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  175. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  176. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  177. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  178. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  179. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  180. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- from copy import deepcopy
2
1
  from dataclasses import dataclass
3
2
  from logging import getLogger
4
3
  from typing import Any, Awaitable, Callable, Sequence, cast
@@ -9,6 +8,8 @@ from typing_extensions import TypedDict, Unpack
9
8
  from inspect_ai._util.logger import warn_once
10
9
  from inspect_ai._util.notgiven import NOT_GIVEN, NotGiven
11
10
  from inspect_ai._util.registry import is_registry_object, registry_info
11
+ from inspect_ai.agent._agent import Agent, is_agent
12
+ from inspect_ai.agent._as_solver import as_solver
12
13
  from inspect_ai.approval._policy import ApprovalPolicy, approval_policies_from_config
13
14
  from inspect_ai.dataset import Dataset, MemoryDataset, Sample
14
15
  from inspect_ai.log import EvalLog
@@ -47,7 +48,7 @@ class Task:
47
48
  self,
48
49
  dataset: Dataset | Sequence[Sample] | None = None,
49
50
  setup: Solver | list[Solver] | None = None,
50
- solver: Solver | list[Solver] = generate(),
51
+ solver: Solver | Agent | list[Solver] = generate(),
51
52
  cleanup: Callable[[TaskState], Awaitable[None]] | None = None,
52
53
  scorer: Scorer | list[Scorer] | None = None,
53
54
  metrics: list[Metric] | dict[str, list[Metric]] | None = None,
@@ -158,6 +159,13 @@ class Task:
158
159
  else:
159
160
  return "task"
160
161
 
162
+ @property
163
+ def registry_name(self) -> str | None:
164
+ if is_registry_object(self):
165
+ return registry_info(self).name
166
+ else:
167
+ return None
168
+
161
169
  @property
162
170
  def attribs(self) -> dict[str, Any]:
163
171
  if is_registry_object(self):
@@ -191,8 +199,12 @@ def task_with(
191
199
  ) -> Task:
192
200
  """Task adapted with alternate values for one or more options.
193
201
 
202
+ This function modifies the passed task in place and returns it.
203
+ If you want to create multiple variations of a single task using
204
+ `task_with()` you should create the underlying task multiple times.
205
+
194
206
  Args:
195
- task: Task to adapt (it is deep copied prior to mutating options)
207
+ task: Task to adapt
196
208
  dataset: Dataset to evaluate
197
209
  setup: Setup step (always run even when the main `solver` is replaced).
198
210
  solver: Solver or list of solvers. Defaults to generate(), a normal call to the model.
@@ -227,11 +239,8 @@ def task_with(
227
239
  metadata: Additional metadata to associate with the task.
228
240
 
229
241
  Returns:
230
- Task: Task adapted with alternate options.
242
+ Task: Passed `task` with modifications.
231
243
  """
232
- # deep copy the task
233
- task = deepcopy(task)
234
-
235
244
  if not isinstance(dataset, NotGiven):
236
245
  task.dataset = resolve_dataset(dataset)
237
246
  if not isinstance(setup, NotGiven):
@@ -340,8 +349,13 @@ def resolve_dataset(dataset: Dataset | Sequence[Sample] | None) -> Dataset:
340
349
  return dataset if isinstance(dataset, Dataset) else MemoryDataset(list(dataset))
341
350
 
342
351
 
343
- def resolve_solver(solver: Solver | list[Solver]) -> Solver:
344
- return chain(solver) if isinstance(solver, list) else solver
352
+ def resolve_solver(solver: Solver | Agent | list[Solver]) -> Solver:
353
+ if isinstance(solver, list):
354
+ return chain(solver)
355
+ elif is_agent(solver):
356
+ return as_solver(solver)
357
+ else:
358
+ return cast(Solver, solver)
345
359
 
346
360
 
347
361
  def resolve_model(model: str | Model | None) -> Model | None:
inspect_ai/_util/file.py CHANGED
@@ -322,6 +322,19 @@ def absolute_file_path(file: str) -> str:
322
322
  return file
323
323
 
324
324
 
325
+ def to_uri(path_or_uri: str) -> str:
326
+ # Check if it's already a URI
327
+ parsed = urlparse(path_or_uri)
328
+
329
+ if parsed.scheme:
330
+ # Already has a scheme, return as is
331
+ return path_or_uri
332
+
333
+ # It's a file path, convert to URI
334
+ path_obj = Path(path_or_uri).absolute()
335
+ return path_obj.as_uri()
336
+
337
+
325
338
  def default_fs_options(file: str) -> dict[str, Any]:
326
339
  scheme = urlparse(file).scheme
327
340
  if (
inspect_ai/_util/json.py CHANGED
@@ -8,7 +8,8 @@ import jsonpatch
8
8
  from pydantic import BaseModel, Field, JsonValue
9
9
  from pydantic_core import to_json, to_jsonable_python
10
10
 
11
- from inspect_ai.util._json import JSONType
11
+ JSONType = Literal["string", "integer", "number", "boolean", "array", "object", "null"]
12
+ """Valid types within JSON schema."""
12
13
 
13
14
 
14
15
  def jsonable_python(x: Any) -> Any:
@@ -21,6 +21,7 @@ RegistryType = Literal[
21
21
  "scorer",
22
22
  "metric",
23
23
  "tool",
24
+ "agent",
24
25
  "sandboxenv",
25
26
  "score_reducer",
26
27
  "approver",
@@ -1,13 +1,19 @@
1
1
  import os
2
+ from logging import getLogger
2
3
  from pathlib import Path
3
4
  from typing import Any
4
5
 
5
6
  from pydantic import BaseModel, Field
6
7
  from pydantic_core import to_json
8
+ from semver import Version
7
9
  from shortuuid import uuid
8
10
 
9
11
  from .appdirs import inspect_data_dir
10
12
 
13
+ logger = getLogger(__name__)
14
+
15
+ EXTENSION_COMMAND_VERSIONS = {"inspect.openLogViewer": Version(0, 3, 61)}
16
+
11
17
 
12
18
  class VSCodeCommand(BaseModel):
13
19
  command: str
@@ -34,6 +40,25 @@ def can_execute_vscode_commands() -> bool:
34
40
  return vs_code_commands_dir() is not None
35
41
 
36
42
 
43
+ def can_execute_vscode_command(command: str) -> bool:
44
+ if not can_execute_vscode_commands():
45
+ return False
46
+
47
+ required_version = EXTENSION_COMMAND_VERSIONS.get(command)
48
+ if required_version is None:
49
+ return True
50
+ else:
51
+ return has_vscode_version(required_version)
52
+
53
+
54
+ def has_vscode_version(required_version: Version) -> bool:
55
+ current_version = vscode_extension_version()
56
+ if current_version is None:
57
+ return False
58
+ else:
59
+ return current_version.is_compatible(required_version)
60
+
61
+
37
62
  def vs_code_commands_dir() -> Path | None:
38
63
  workspace_id = vscode_workspace_id()
39
64
  if workspace_id:
@@ -49,3 +74,15 @@ def vs_code_commands_dir() -> Path | None:
49
74
 
50
75
  def vscode_workspace_id() -> str | None:
51
76
  return os.environ.get("INSPECT_WORKSPACE_ID", None)
77
+
78
+
79
+ def vscode_extension_version() -> Version | None:
80
+ version = os.environ.get("INSPECT_VSCODE_EXT_VERSION", None)
81
+ if version is not None:
82
+ try:
83
+ return Version.parse(version)
84
+ except Exception:
85
+ logger.warning(f"Invalid Inspect vscode extension version: {version}")
86
+ return None
87
+ else:
88
+ return None
@@ -31,6 +31,10 @@
31
31
  --inspect-font-size-base: 0.9rem;
32
32
  --inspect-font-size-small: 0.8rem;
33
33
  --inspect-font-size-smaller: 0.8rem;
34
+
35
+ /* Inspect Glass */
36
+ --inspect-glass-color: #000000;
37
+ --inspect-glass-opacity: 0.3;
34
38
  }
35
39
 
36
40
  body:not([class^="vscode-"]) button {
@@ -154,6 +158,8 @@ body[class^="vscode-"] {
154
158
  --inspect-input-border: var(--vscode-input-border);
155
159
  --inspect-diff-add-color: var(--vscode-diffEditor-insertedTextBackground);
156
160
  --inspect-diff-remove-color: var(--vscode-diffEditor-removedTextBackground);
161
+ --inspect-glass-color: var(--vscode-editor-foreground);
162
+ --inspect-glass-opacity: 0.15;
157
163
  }
158
164
 
159
165
  html.vscode {