inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. inspect_ai/__init__.py +1 -0
  2. inspect_ai/_cli/common.py +1 -1
  3. inspect_ai/_cli/trace.py +33 -20
  4. inspect_ai/_display/core/active.py +1 -1
  5. inspect_ai/_display/core/display.py +1 -1
  6. inspect_ai/_display/core/footer.py +1 -1
  7. inspect_ai/_display/core/panel.py +1 -1
  8. inspect_ai/_display/core/progress.py +0 -6
  9. inspect_ai/_display/core/rich.py +1 -1
  10. inspect_ai/_display/rich/display.py +2 -2
  11. inspect_ai/_display/textual/app.py +15 -17
  12. inspect_ai/_display/textual/widgets/clock.py +3 -3
  13. inspect_ai/_display/textual/widgets/samples.py +6 -13
  14. inspect_ai/_eval/context.py +9 -1
  15. inspect_ai/_eval/run.py +16 -11
  16. inspect_ai/_eval/score.py +4 -10
  17. inspect_ai/_eval/task/results.py +5 -4
  18. inspect_ai/_eval/task/run.py +6 -12
  19. inspect_ai/_eval/task/task.py +10 -0
  20. inspect_ai/_util/ansi.py +31 -0
  21. inspect_ai/_util/datetime.py +1 -1
  22. inspect_ai/_util/deprecation.py +1 -1
  23. inspect_ai/_util/format.py +7 -0
  24. inspect_ai/_util/json.py +11 -1
  25. inspect_ai/_util/logger.py +14 -13
  26. inspect_ai/_util/throttle.py +10 -1
  27. inspect_ai/_util/trace.py +79 -47
  28. inspect_ai/_util/transcript.py +37 -4
  29. inspect_ai/_util/vscode.py +51 -0
  30. inspect_ai/_view/notify.py +2 -1
  31. inspect_ai/_view/www/.prettierrc.js +12 -0
  32. inspect_ai/_view/www/App.css +22 -1
  33. inspect_ai/_view/www/dist/assets/index.css +2374 -2
  34. inspect_ai/_view/www/dist/assets/index.js +29752 -24492
  35. inspect_ai/_view/www/log-schema.json +262 -215
  36. inspect_ai/_view/www/package.json +1 -0
  37. inspect_ai/_view/www/src/App.mjs +19 -9
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/api/Types.mjs +15 -4
  40. inspect_ai/_view/www/src/api/api-http.mjs +2 -0
  41. inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
  42. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
  43. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
  44. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
  45. inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
  46. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
  47. inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
  48. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
  49. inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
  50. inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
  51. inspect_ai/_view/www/src/components/Tools.mjs +28 -5
  52. inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
  53. inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
  54. inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
  55. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
  56. inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
  57. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
  58. inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
  59. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
  60. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
  61. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
  62. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
  63. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
  64. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
  65. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
  66. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
  67. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
  68. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
  69. inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +28 -20
  71. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  72. inspect_ai/_view/www/yarn.lock +44 -0
  73. inspect_ai/approval/_apply.py +4 -0
  74. inspect_ai/approval/_human/panel.py +5 -8
  75. inspect_ai/dataset/_dataset.py +51 -10
  76. inspect_ai/dataset/_util.py +31 -3
  77. inspect_ai/log/__init__.py +2 -0
  78. inspect_ai/log/_log.py +30 -2
  79. inspect_ai/log/_recorders/eval.py +2 -0
  80. inspect_ai/model/_call_tools.py +31 -7
  81. inspect_ai/model/_chat_message.py +3 -0
  82. inspect_ai/model/_model.py +42 -1
  83. inspect_ai/model/_providers/anthropic.py +4 -0
  84. inspect_ai/model/_providers/google.py +24 -6
  85. inspect_ai/model/_providers/openai.py +17 -3
  86. inspect_ai/model/_providers/openai_o1.py +10 -12
  87. inspect_ai/model/_render.py +9 -2
  88. inspect_ai/scorer/_metric.py +12 -1
  89. inspect_ai/solver/__init__.py +2 -0
  90. inspect_ai/solver/_human_agent/agent.py +83 -0
  91. inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
  92. inspect_ai/solver/_human_agent/commands/clock.py +70 -0
  93. inspect_ai/solver/_human_agent/commands/command.py +59 -0
  94. inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
  95. inspect_ai/solver/_human_agent/commands/note.py +42 -0
  96. inspect_ai/solver/_human_agent/commands/score.py +80 -0
  97. inspect_ai/solver/_human_agent/commands/status.py +62 -0
  98. inspect_ai/solver/_human_agent/commands/submit.py +151 -0
  99. inspect_ai/solver/_human_agent/install.py +222 -0
  100. inspect_ai/solver/_human_agent/panel.py +252 -0
  101. inspect_ai/solver/_human_agent/service.py +45 -0
  102. inspect_ai/solver/_human_agent/state.py +55 -0
  103. inspect_ai/solver/_human_agent/view.py +24 -0
  104. inspect_ai/solver/_task_state.py +28 -2
  105. inspect_ai/tool/_tool.py +10 -2
  106. inspect_ai/tool/_tool_info.py +2 -1
  107. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
  108. inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
  109. inspect_ai/util/__init__.py +12 -4
  110. inspect_ai/{_util/display.py → util/_display.py} +6 -0
  111. inspect_ai/util/_panel.py +31 -9
  112. inspect_ai/util/_sandbox/__init__.py +0 -3
  113. inspect_ai/util/_sandbox/context.py +5 -1
  114. inspect_ai/util/_sandbox/docker/compose.py +17 -13
  115. inspect_ai/util/_sandbox/docker/docker.py +9 -6
  116. inspect_ai/util/_sandbox/docker/internal.py +1 -1
  117. inspect_ai/util/_sandbox/docker/util.py +3 -2
  118. inspect_ai/util/_sandbox/environment.py +6 -5
  119. inspect_ai/util/_sandbox/local.py +1 -1
  120. inspect_ai/util/_sandbox/self_check.py +18 -18
  121. inspect_ai/util/_sandbox/service.py +22 -7
  122. inspect_ai/util/_store.py +7 -8
  123. inspect_ai/util/_store_model.py +110 -0
  124. inspect_ai/util/_subprocess.py +3 -3
  125. inspect_ai/util/_throttle.py +32 -0
  126. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
  127. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
  128. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
  129. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
  130. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
  131. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import json
3
+ from logging import getLogger
3
4
  from pathlib import PurePosixPath
4
5
  from textwrap import dedent
5
6
  from typing import (
@@ -14,9 +15,12 @@ from inspect_ai.util._subprocess import ExecResult
14
15
 
15
16
  from .environment import SandboxEnvironment
16
17
 
18
+ logger = getLogger(__name__)
19
+
20
+
17
21
  REQUESTS_DIR = "requests"
18
22
  RESPONSES_DIR = "responses"
19
- SERVICES_DIR = "/tmp/inspect-sandbox-services"
23
+ SERVICES_DIR = "/var/tmp/sandbox-services"
20
24
 
21
25
  ID = "id"
22
26
  METHOD = "method"
@@ -70,7 +74,7 @@ class SandboxService:
70
74
 
71
75
  ```python
72
76
  import sys
73
- sys.path.append("/tmp/inspect-sandbox-services/foo")
77
+ sys.path.append("/var/tmp/sandbox-services/foo")
74
78
  import foo
75
79
  ```
76
80
 
@@ -79,7 +83,7 @@ class SandboxService:
79
83
  ```python
80
84
  import importlib.util
81
85
  spec = importlib.util.spec_from_file_location(
82
- "foo", "/tmp/inspect-sandbox-services/foo/foo.py"
86
+ "foo", "/var/tmp/sandbox-services/foo/foo.py"
83
87
  )
84
88
  foo = importlib.util.module_from_spec(spec)
85
89
  spec.loader.exec_module(foo)
@@ -150,8 +154,14 @@ class SandboxService:
150
154
  f"Error reading request for service {self._name}: '{read_request}' ({result.stderr})"
151
155
  )
152
156
 
153
- # parse request
154
- request_data = json.loads(result.stdout)
157
+ # parse request (decode error could occur if its incomplete so bypass this)
158
+ try:
159
+ request_data = json.loads(result.stdout)
160
+ except json.JSONDecodeError:
161
+ logger.warning(
162
+ f"JSON decoding error reading service request: {result.stdout}"
163
+ )
164
+ return None
155
165
  if not isinstance(request_data, dict):
156
166
  raise TypeError(f"Service request is not a dict (type={request_data})")
157
167
 
@@ -275,7 +285,7 @@ class SandboxService:
275
285
  return request_id
276
286
 
277
287
  def _read_{self._name}_response(request_id: str) -> tuple[bool, Any]:
278
- from json import load
288
+ from json import JSONDecodeError, load
279
289
  from pathlib import Path
280
290
 
281
291
  responses_dir = Path("{SERVICES_DIR}", "{self._name}", "{RESPONSES_DIR}")
@@ -283,7 +293,12 @@ class SandboxService:
283
293
  if response_path.exists():
284
294
  # read and remove the file
285
295
  with open(response_path, "r") as f:
286
- response = load(f)
296
+ # it's possible the file is still being written so
297
+ # just catch and wait for another retry if this occurs
298
+ try:
299
+ response = load(f)
300
+ except JSONDecodeError:
301
+ return False, None
287
302
  response_path.unlink()
288
303
 
289
304
  # raise error if we have one
inspect_ai/util/_store.py CHANGED
@@ -34,18 +34,14 @@ class Store:
34
34
  inheriting from Pydantic `BaseModel`)
35
35
  """
36
36
 
37
- def __init__(self) -> None:
38
- self._data: dict[str, Any] = {}
37
+ def __init__(self, data: dict[str, Any] | None = None) -> None:
38
+ self._data = deepcopy(data) if data else {}
39
39
 
40
40
  @overload
41
- def get(self, key: str, default: None = None) -> Any:
42
- return self._data.get(key, default)
41
+ def get(self, key: str, default: None = None) -> Any: ...
43
42
 
44
43
  @overload
45
- def get(self, key: str, default: VT) -> VT:
46
- if key not in self._data.keys():
47
- self._data[key] = default
48
- return cast(VT, self._data.get(key, default))
44
+ def get(self, key: str, default: VT) -> VT: ...
49
45
 
50
46
  def get(self, key: str, default: VT | None = None) -> VT | Any:
51
47
  """Get a value from the store.
@@ -60,6 +56,9 @@ class Store:
60
56
  Returns:
61
57
  Value if is exists, otherwise default.
62
58
  """
59
+ if default is not None:
60
+ if key not in self._data.keys():
61
+ self._data[key] = default
63
62
  return cast(VT, self._data.get(key, default))
64
63
 
65
64
  def set(self, key: str, value: Any) -> None:
@@ -0,0 +1,110 @@
1
+ from typing import Any, Type, TypeVar
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+
5
+ from ._store import Store, store
6
+
7
+
8
+ class StoreModel(BaseModel):
9
+ """Store backed Pydandic BaseModel.
10
+
11
+ The model is initialised from a Store, so that Store should
12
+ either already satisfy the validation constraints of the model
13
+ OR you should provide Field(default=) annotations for all of
14
+ your model fields (the latter approach is recommended).
15
+ """
16
+
17
+ store: Store = Field(exclude=True, default_factory=store)
18
+
19
+ def model_post_init(self, __context: Any) -> None:
20
+ for name in self.model_fields.keys():
21
+ if name == "store":
22
+ continue
23
+ # if its in the store, then have our dict reflect that
24
+ ns_name = self._ns_name(name)
25
+ if ns_name in self.store:
26
+ self.__dict__[name] = self.store.get(ns_name)
27
+ # if its not in the store, then reflect dict into store
28
+ elif name in self.__dict__.keys():
29
+ self.store.set(ns_name, self.__dict__[name])
30
+
31
+ def __getattribute__(self, name: str) -> Any:
32
+ # sidestep dunders and pydantic fields
33
+ if name.startswith("__") or name.startswith("model_"):
34
+ return object.__getattribute__(self, name)
35
+ # handle model_fields (except 'store') by reading the store
36
+ elif name in object.__getattribute__(self, "model_fields") and name != "store":
37
+ store_key = self._ns_name(name)
38
+ if store_key in self.store:
39
+ return self.store.get(store_key)
40
+ else:
41
+ return object.__getattribute__(self, name)
42
+ # default to super
43
+ else:
44
+ return super().__getattribute__(name)
45
+
46
+ def __setattr__(self, name: str, value: Any) -> None:
47
+ if name in self.model_fields:
48
+ # validate with the new value (can throw ValidationError)
49
+ temp_data = self.store._data.copy()
50
+ temp_data[self._ns_name(name)] = value
51
+ self._validate_store(temp_data)
52
+
53
+ # update the store and sync the underlying __dict__
54
+ self.store.set(self._ns_name(name), value)
55
+ self.__dict__[name] = value
56
+ else:
57
+ super().__setattr__(name, value)
58
+
59
+ def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
60
+ self._sync_model() # in case store was updated behind our back
61
+ return super().model_dump(*args, **kwargs)
62
+
63
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> str:
64
+ self._sync_model() # in case store was updated behind our back
65
+ return super().model_dump_json(*args, **kwargs)
66
+
67
+ def _sync_model(self) -> None:
68
+ self._validate_store()
69
+ for field_name in self.model_fields.keys():
70
+ if field_name == "store":
71
+ continue
72
+ store_value = self.store.get(self._ns_name(field_name))
73
+ self.__dict__[field_name] = store_value
74
+
75
+ def _validate_store(self, data: dict[str, Any] | None = None) -> None:
76
+ # validate store or custom dict
77
+ data = data if data is not None else self.store._data
78
+
79
+ # pick out keys to validate
80
+ validate: dict[str, Any] = {}
81
+ for k, v in data.items():
82
+ if k.startswith(f"{self.__class__.__name__}:"):
83
+ unprefixed = self._un_ns_name(k)
84
+ validate[unprefixed] = v
85
+
86
+ # perform validation
87
+ self.__class__.model_validate(validate)
88
+
89
+ def _ns_name(self, name: str) -> str:
90
+ return f"{self.__class__.__name__}:{name}"
91
+
92
+ def _un_ns_name(self, name: str) -> str:
93
+ return name.replace(f"{self.__class__.__name__}:", "", 1)
94
+
95
+ model_config = ConfigDict(arbitrary_types_allowed=True)
96
+
97
+
98
+ SMT = TypeVar("SMT", bound=StoreModel)
99
+
100
+
101
+ def store_as(model_cls: Type[SMT]) -> SMT:
102
+ """Get a Pydantic model interface to the store.
103
+
104
+ Args:
105
+ model_cls: Pydantic model type (must derive from StoreModel)
106
+
107
+ Returns:
108
+ StoreModel: Instance of model_cls bound to current Store.
109
+ """
110
+ return model_cls(store=store())
@@ -101,9 +101,9 @@ async def subprocess(
101
101
  input = input.encode() if isinstance(input, str) else input
102
102
 
103
103
  # function to run command (we may or may not run it w/ concurrency)
104
- async def run_command() -> (
105
- AsyncGenerator[Union[Process, ExecResult[str], ExecResult[bytes]], None]
106
- ):
104
+ async def run_command() -> AsyncGenerator[
105
+ Union[Process, ExecResult[str], ExecResult[bytes]], None
106
+ ]:
107
107
  if isinstance(args, str):
108
108
  proc = await asyncio.create_subprocess_shell(
109
109
  args,
@@ -0,0 +1,32 @@
1
+ import time
2
+ from functools import wraps
3
+ from typing import Any, Callable
4
+
5
+
6
+ def throttle(seconds: float) -> Callable[..., Any]:
7
+ """Throttle a function to ensure it is called no more than every n seconds.
8
+
9
+ Args:
10
+ seconds (float): Throttle time.
11
+
12
+ Returns:
13
+ Callable: Throttled function.
14
+ """
15
+
16
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
17
+ last_called: float = 0
18
+ last_result: Any = None
19
+
20
+ @wraps(func)
21
+ def wrapped(*args: Any, **kwargs: Any) -> Any:
22
+ nonlocal last_called
23
+ nonlocal last_result
24
+ current_time = time.time()
25
+ if current_time - last_called >= seconds:
26
+ last_result = func(*args, **kwargs)
27
+ last_called = current_time
28
+ return last_result
29
+
30
+ return wrapped
31
+
32
+ return decorator
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: inspect_ai
3
- Version: 0.3.55
3
+ Version: 0.3.57
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Safety Institute
6
6
  License: MIT License
@@ -67,7 +67,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
67
67
  Requires-Dist: pytest-cov; extra == "dev"
68
68
  Requires-Dist: pytest-dotenv; extra == "dev"
69
69
  Requires-Dist: pytest-xdist; extra == "dev"
70
- Requires-Dist: ruff==0.8.4; extra == "dev"
70
+ Requires-Dist: ruff==0.9.0; extra == "dev"
71
71
  Requires-Dist: textual-dev>=0.86.2; extra == "dev"
72
72
  Requires-Dist: types-PyYAML; extra == "dev"
73
73
  Requires-Dist: types-beautifulsoup4; extra == "dev"