inspect-ai 0.3.69__py3-none-any.whl → 0.3.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. inspect_ai/_cli/eval.py +13 -1
  2. inspect_ai/_display/textual/app.py +3 -2
  3. inspect_ai/_display/textual/widgets/samples.py +4 -10
  4. inspect_ai/_display/textual/widgets/transcript.py +25 -12
  5. inspect_ai/_eval/eval.py +14 -2
  6. inspect_ai/_eval/evalset.py +6 -1
  7. inspect_ai/_eval/run.py +6 -0
  8. inspect_ai/_eval/task/run.py +44 -15
  9. inspect_ai/_eval/task/task.py +26 -3
  10. inspect_ai/_util/interrupt.py +6 -0
  11. inspect_ai/_util/logger.py +19 -0
  12. inspect_ai/_util/rich.py +7 -8
  13. inspect_ai/_util/text.py +13 -0
  14. inspect_ai/_util/transcript.py +10 -2
  15. inspect_ai/_util/working.py +46 -0
  16. inspect_ai/_view/www/dist/assets/index.css +56 -12
  17. inspect_ai/_view/www/dist/assets/index.js +904 -750
  18. inspect_ai/_view/www/log-schema.json +337 -2
  19. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  20. inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
  21. inspect_ai/_view/www/src/appearance/icons.ts +3 -1
  22. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
  23. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
  24. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
  25. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  26. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
  27. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
  28. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
  29. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
  30. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
  31. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
  32. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
  33. inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
  34. inspect_ai/_view/www/src/types/log.d.ts +188 -108
  35. inspect_ai/_view/www/src/utils/format.ts +7 -4
  36. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
  37. inspect_ai/log/__init__.py +2 -0
  38. inspect_ai/log/_condense.py +1 -0
  39. inspect_ai/log/_log.py +72 -12
  40. inspect_ai/log/_samples.py +5 -1
  41. inspect_ai/log/_transcript.py +31 -1
  42. inspect_ai/model/_call_tools.py +1 -1
  43. inspect_ai/model/_conversation.py +1 -1
  44. inspect_ai/model/_model.py +32 -16
  45. inspect_ai/model/_model_call.py +10 -3
  46. inspect_ai/model/_providers/anthropic.py +13 -2
  47. inspect_ai/model/_providers/bedrock.py +7 -0
  48. inspect_ai/model/_providers/cloudflare.py +20 -7
  49. inspect_ai/model/_providers/google.py +2 -0
  50. inspect_ai/model/_providers/groq.py +57 -23
  51. inspect_ai/model/_providers/hf.py +6 -0
  52. inspect_ai/model/_providers/mistral.py +78 -51
  53. inspect_ai/model/_providers/openai.py +9 -0
  54. inspect_ai/model/_providers/providers.py +1 -1
  55. inspect_ai/model/_providers/util/tracker.py +92 -0
  56. inspect_ai/model/_providers/vllm.py +13 -5
  57. inspect_ai/solver/_basic_agent.py +1 -3
  58. inspect_ai/solver/_bridge/patch.py +0 -2
  59. inspect_ai/solver/_limit.py +4 -4
  60. inspect_ai/solver/_plan.py +0 -3
  61. inspect_ai/solver/_task_state.py +7 -0
  62. inspect_ai/tool/_tools/_web_search.py +3 -3
  63. inspect_ai/util/_concurrency.py +14 -8
  64. inspect_ai/util/_sandbox/context.py +15 -0
  65. inspect_ai/util/_sandbox/docker/docker.py +7 -5
  66. inspect_ai/util/_sandbox/environment.py +32 -1
  67. inspect_ai/util/_sandbox/events.py +149 -0
  68. inspect_ai/util/_sandbox/local.py +3 -3
  69. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +3 -3
  70. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +74 -67
  71. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
  72. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
  73. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
  74. {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import os
3
- from typing import Literal, Protocol, cast, runtime_checkable
3
+ from typing import Literal, Protocol, runtime_checkable
4
4
 
5
5
  import httpx
6
6
  from bs4 import BeautifulSoup, NavigableString
@@ -90,8 +90,8 @@ def web_search(
90
90
  return_exceptions=True,
91
91
  )
92
92
  for page, link in zip(pages, links):
93
- if page and not isinstance(page, Exception):
94
- page_contents.append(cast(str, page))
93
+ if page and not isinstance(page, BaseException):
94
+ page_contents.append(page)
95
95
  urls.append(link.url)
96
96
  snippets.append(link.snippet)
97
97
  search_calls += 1
@@ -1,13 +1,19 @@
1
1
  import asyncio
2
+ import contextlib
3
+ import time
2
4
  from dataclasses import dataclass
5
+ from typing import AsyncIterator
3
6
 
7
+ from inspect_ai._util.working import report_sample_waiting_time
4
8
 
5
- def concurrency(
9
+
10
+ @contextlib.asynccontextmanager
11
+ async def concurrency(
6
12
  name: str,
7
13
  concurrency: int,
8
14
  key: str | None = None,
9
- ) -> asyncio.Semaphore:
10
- """Obtain a concurrency context.
15
+ ) -> AsyncIterator[None]:
16
+ """Concurrency context manager.
11
17
 
12
18
  A concurrency context can be used to limit the number of coroutines
13
19
  executing a block of code (e.g calling an API). For example, here
@@ -32,9 +38,6 @@ def concurrency(
32
38
  Used if the unique key isn't human readable -- e.g. includes
33
39
  api tokens or account ids so that the more readable `name`
34
40
  can be presented to users e.g in console UI>
35
-
36
- Returns:
37
- Asyncio Semaphore for concurrency context.
38
41
  """
39
42
  # sort out key
40
43
  key = key if key else name
@@ -47,8 +50,11 @@ def concurrency(
47
50
  )
48
51
  _concurrency_semaphores[key] = semaphore
49
52
 
50
- # return the semaphore
51
- return semaphore.semaphore
53
+ # wait and yield to protected code
54
+ start_wait = time.monotonic()
55
+ async with semaphore.semaphore:
56
+ report_sample_waiting_time(time.monotonic() - start_wait)
57
+ yield
52
58
 
53
59
 
54
60
  def concurrency_status() -> dict[str, tuple[int, int]]:
@@ -5,6 +5,7 @@ from typing import Any, NoReturn, cast
5
5
  from shortuuid import uuid
6
6
 
7
7
  from inspect_ai._util.constants import SANDBOX_SETUP_TIMEOUT
8
+ from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
8
9
 
9
10
  from .environment import (
10
11
  SampleCleanup,
@@ -132,6 +133,9 @@ async def init_sandbox_environments_sample(
132
133
  # verify that there is at least one environment and a 'default' env
133
134
  validate_sandbox_environments(sandboxenv_type, environments)
134
135
 
136
+ # proxy environments (for recording SandboxEvent)
137
+ environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
138
+
135
139
  try:
136
140
  # copy files into environments
137
141
  await copy_sandbox_environment_files(files, environments)
@@ -148,6 +152,7 @@ async def init_sandbox_environments_sample(
148
152
  return environments
149
153
 
150
154
  except Exception as ex:
155
+ environments = unproxy_environments(environments)
151
156
  await sample_cleanup(task_name, config, environments, True)
152
157
  raise ex
153
158
 
@@ -161,9 +166,19 @@ async def cleanup_sandbox_environments_sample(
161
166
  ) -> None:
162
167
  sandboxenv_type = registry_find_sandboxenv(type)
163
168
  sample_cleanup = cast(SampleCleanup, getattr(sandboxenv_type, "sample_cleanup"))
169
+ environments = unproxy_environments(environments)
164
170
  await sample_cleanup(task_name, config, environments, interrupted)
165
171
 
166
172
 
173
+ def unproxy_environments(
174
+ environments: dict[str, SandboxEnvironment],
175
+ ) -> dict[str, SandboxEnvironment]:
176
+ return {
177
+ k: v._sandbox
178
+ for k, v in cast(dict[str, SandboxEnvironmentProxy], environments).items()
179
+ }
180
+
181
+
167
182
  async def copy_sandbox_environment_files(
168
183
  files: dict[str, bytes], environments: dict[str, SandboxEnvironment]
169
184
  ) -> None:
@@ -5,7 +5,7 @@ import os
5
5
  import tempfile
6
6
  from logging import getLogger
7
7
  from pathlib import Path, PurePosixPath
8
- from typing import Literal, Union, cast, overload
8
+ from typing import Literal, Union, overload
9
9
 
10
10
  from typing_extensions import override
11
11
 
@@ -145,7 +145,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
145
145
  project = await ComposeProject.create(
146
146
  name=task_project_name(task_name),
147
147
  config=config,
148
- sample_id=sample.id if sample is not None else None,
148
+ sample_id=sample.sample.id if sample is not None else None,
149
149
  epoch=sample.epoch if sample is not None else None,
150
150
  env=env,
151
151
  )
@@ -221,9 +221,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
221
221
  # (this enables us to show output for the cleanup operation)
222
222
  if not interrupted:
223
223
  # extract project from first environment
224
- project = cast(
225
- DockerSandboxEnvironment, next(iter(environments.values()))
226
- )._project
224
+ project = (
225
+ next(iter(environments.values()))
226
+ .as_type(DockerSandboxEnvironment)
227
+ ._project
228
+ )
227
229
  # cleanup the project
228
230
  await project_cleanup(project=project, quiet=True)
229
231
 
@@ -2,12 +2,24 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  from dataclasses import dataclass, field
5
- from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
5
+ from typing import (
6
+ Any,
7
+ Awaitable,
8
+ Callable,
9
+ Literal,
10
+ NamedTuple,
11
+ Type,
12
+ TypeVar,
13
+ Union,
14
+ overload,
15
+ )
6
16
 
7
17
  from pydantic import BaseModel, Field
8
18
 
9
19
  from .._subprocess import ExecResult
10
20
 
21
+ ST = TypeVar("ST", bound="SandboxEnvironment")
22
+
11
23
  TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
12
24
  TaskCleanup = Callable[
13
25
  [str, Union["SandboxEnvironmentConfigType", None], bool], Awaitable[None]
@@ -180,6 +192,25 @@ class SandboxEnvironment(abc.ABC):
180
192
  """
181
193
  raise NotImplementedError("connection not implemented")
182
194
 
195
+ def as_type(self, sandbox_cls: Type[ST]) -> ST:
196
+ """Verify and return a reference to a subclass of SandboxEnvironment.
197
+
198
+ Args:
199
+ sandbox_cls: Class of sandbox (subclass of SandboxEnvironment)
200
+
201
+ Returns:
202
+ Reference to the sandbox using the requested type.
203
+
204
+ Raises:
205
+ TypeError: If the sandbox is not of the requested type.
206
+ """
207
+ if isinstance(self, sandbox_cls):
208
+ return self
209
+ else:
210
+ raise TypeError(
211
+ f"Expected instance of {sandbox_cls.__name__}, got {type(self).__name__}"
212
+ )
213
+
183
214
  @classmethod
184
215
  def config_files(cls) -> list[str]:
185
216
  """Standard config files for this provider (used for automatic discovery)"""
@@ -0,0 +1,149 @@
1
+ import shlex
2
+ from typing import Literal, Type, Union, overload
3
+
4
+ from pydantic import JsonValue
5
+ from pydantic_core import to_jsonable_python
6
+ from typing_extensions import override
7
+
8
+ from inspect_ai._util.text import truncate_lines
9
+ from inspect_ai.util._subprocess import ExecResult
10
+
11
+ from .environment import (
12
+ ST,
13
+ SandboxConnection,
14
+ SandboxEnvironment,
15
+ SandboxEnvironmentConfigType,
16
+ )
17
+
18
+
19
+ class SandboxEnvironmentProxy(SandboxEnvironment):
20
+ def __init__(self, sandbox: SandboxEnvironment) -> None:
21
+ self._sandbox = sandbox
22
+
23
+ @override
24
+ async def exec(
25
+ self,
26
+ cmd: list[str],
27
+ input: str | bytes | None = None,
28
+ cwd: str | None = None,
29
+ env: dict[str, str] = {},
30
+ user: str | None = None,
31
+ timeout: int | None = None,
32
+ timeout_retry: bool = True,
33
+ ) -> ExecResult[str]:
34
+ from inspect_ai.log._transcript import SandboxEvent, transcript
35
+
36
+ # make call
37
+ result = await self._sandbox.exec(
38
+ cmd, input, cwd, env, user, timeout, timeout_retry
39
+ )
40
+
41
+ # yield event
42
+ options: dict[str, JsonValue] = {}
43
+ if cwd:
44
+ options["cwd"] = cwd
45
+ if env:
46
+ options["env"] = to_jsonable_python(env)
47
+ if user:
48
+ options["user"] = user
49
+ if timeout is not None:
50
+ options["timeout"] = timeout
51
+ if timeout_retry is not True:
52
+ options["timeout_retry"] = timeout_retry
53
+ transcript()._event(
54
+ SandboxEvent(
55
+ action="exec",
56
+ cmd=" ".join([shlex.quote(c) for c in cmd]),
57
+ input=content_display(input) if input is not None else None,
58
+ options=options,
59
+ result=result.returncode,
60
+ output=content_display(
61
+ f"{result.stderr}\n\n{result.stdout}"
62
+ if result.stderr
63
+ else result.stdout
64
+ ),
65
+ )
66
+ )
67
+
68
+ # return result
69
+ return result
70
+
71
+ @override
72
+ async def write_file(self, file: str, contents: str | bytes) -> None:
73
+ from inspect_ai.log._transcript import SandboxEvent, transcript
74
+
75
+ # make call
76
+ await self._sandbox.write_file(file, contents)
77
+
78
+ # yield event
79
+ transcript()._event(
80
+ SandboxEvent(
81
+ action="write_file", file=file, input=content_display(contents)
82
+ )
83
+ )
84
+
85
+ @overload
86
+ async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
87
+
88
+ @overload
89
+ async def read_file(self, file: str, text: Literal[False]) -> bytes: ...
90
+
91
+ @override
92
+ async def read_file(self, file: str, text: bool = True) -> Union[str | bytes]:
93
+ from inspect_ai.log._transcript import SandboxEvent, transcript
94
+
95
+ # make call
96
+ if text is True:
97
+ output: str | bytes = await self._sandbox.read_file(file, True)
98
+ else:
99
+ output = await self._sandbox.read_file(file, False)
100
+
101
+ # yield event
102
+ transcript()._event(
103
+ SandboxEvent(action="read_file", file=file, output=content_display(output))
104
+ )
105
+
106
+ # return result
107
+ return output
108
+
109
+ @override
110
+ async def connection(self) -> SandboxConnection:
111
+ return await self._sandbox.connection()
112
+
113
+ @override
114
+ def as_type(self, sandbox_cls: Type[ST]) -> ST:
115
+ if isinstance(self._sandbox, sandbox_cls):
116
+ return self._sandbox
117
+ else:
118
+ raise TypeError(
119
+ f"Expected instance of {sandbox_cls.__name__}, got {type(self._sandbox).__name__}"
120
+ )
121
+
122
+ @classmethod
123
+ async def sample_cleanup(
124
+ cls,
125
+ task_name: str,
126
+ config: SandboxEnvironmentConfigType | None,
127
+ environments: dict[str, SandboxEnvironment],
128
+ interrupted: bool,
129
+ ) -> None:
130
+ pass
131
+
132
+
133
+ def content_display(content: str | bytes) -> str:
134
+ if isinstance(content, str):
135
+ content, truncated = truncate_lines(content, 20)
136
+ if truncated:
137
+ content = f"{content}\n\nOutput truncated ({truncated} additional lines)"
138
+ return content
139
+ else:
140
+ return f"binary ({pretty_size(len(content))})"
141
+
142
+
143
+ def pretty_size(size: int) -> str:
144
+ if size < 1024:
145
+ return f"{size} B"
146
+ if size < 1024 * 1024:
147
+ return f"{size / 1024:.2f} KB"
148
+
149
+ return f"{size / (1024 * 1024):.2f} MB"
@@ -1,7 +1,7 @@
1
1
  import tempfile
2
2
  import warnings
3
3
  from pathlib import Path
4
- from typing import Literal, Union, cast, overload
4
+ from typing import Literal, Union, overload
5
5
 
6
6
  from typing_extensions import override
7
7
 
@@ -40,8 +40,8 @@ class LocalSandboxEnvironment(SandboxEnvironment):
40
40
  interrupted: bool,
41
41
  ) -> None:
42
42
  for environment in environments.values():
43
- env = cast(LocalSandboxEnvironment, environment)
44
- env.directory.cleanup()
43
+ sandbox = environment.as_type(LocalSandboxEnvironment)
44
+ sandbox.directory.cleanup()
45
45
 
46
46
  def __init__(self) -> None:
47
47
  self.directory = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: inspect_ai
3
- Version: 0.3.69
3
+ Version: 0.3.70
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -26,7 +26,7 @@ Requires-Dist: beautifulsoup4
26
26
  Requires-Dist: click>=8.1.3
27
27
  Requires-Dist: debugpy
28
28
  Requires-Dist: docstring-parser>=0.16
29
- Requires-Dist: fsspec>=2021.09.0
29
+ Requires-Dist: fsspec<=2024.12.0,>=2023.1.0
30
30
  Requires-Dist: httpx
31
31
  Requires-Dist: ijson>=3.2.0
32
32
  Requires-Dist: jsonlines>=3.0.0
@@ -45,7 +45,7 @@ Requires-Dist: s3fs>=2023
45
45
  Requires-Dist: semver>=3.0.0
46
46
  Requires-Dist: shortuuid
47
47
  Requires-Dist: tenacity
48
- Requires-Dist: textual<=1.0.0,>=0.86.2
48
+ Requires-Dist: textual>=0.86.2
49
49
  Requires-Dist: typing_extensions>=4.9.0
50
50
  Requires-Dist: zipp>=3.19.1
51
51
  Provides-Extra: dev