inspect-ai 0.3.55__py3-none-any.whl → 0.3.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +1 -0
- inspect_ai/_cli/common.py +1 -1
- inspect_ai/_cli/trace.py +33 -20
- inspect_ai/_display/core/active.py +1 -1
- inspect_ai/_display/core/display.py +1 -1
- inspect_ai/_display/core/footer.py +1 -1
- inspect_ai/_display/core/progress.py +0 -6
- inspect_ai/_display/core/rich.py +1 -1
- inspect_ai/_display/rich/display.py +2 -2
- inspect_ai/_display/textual/app.py +15 -17
- inspect_ai/_display/textual/widgets/clock.py +3 -3
- inspect_ai/_display/textual/widgets/samples.py +6 -13
- inspect_ai/_eval/context.py +9 -1
- inspect_ai/_eval/score.py +4 -10
- inspect_ai/_eval/task/results.py +5 -4
- inspect_ai/_eval/task/run.py +6 -12
- inspect_ai/_eval/task/task.py +10 -0
- inspect_ai/_util/ansi.py +31 -0
- inspect_ai/_util/format.py +7 -0
- inspect_ai/_util/logger.py +12 -12
- inspect_ai/_util/throttle.py +10 -1
- inspect_ai/_util/trace.py +43 -47
- inspect_ai/_util/transcript.py +4 -0
- inspect_ai/_util/vscode.py +51 -0
- inspect_ai/_view/notify.py +2 -1
- inspect_ai/_view/www/App.css +22 -1
- inspect_ai/_view/www/dist/assets/index.css +2374 -2
- inspect_ai/_view/www/dist/assets/index.js +29622 -24424
- inspect_ai/_view/www/log-schema.json +138 -90
- inspect_ai/_view/www/package.json +1 -0
- inspect_ai/_view/www/src/App.mjs +1 -0
- inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
- inspect_ai/_view/www/src/components/Tools.mjs +11 -3
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
- inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +26 -12
- inspect_ai/_view/www/yarn.lock +44 -0
- inspect_ai/approval/_apply.py +4 -0
- inspect_ai/approval/_human/panel.py +5 -8
- inspect_ai/dataset/_dataset.py +51 -10
- inspect_ai/dataset/_util.py +31 -3
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_log.py +5 -2
- inspect_ai/model/_call_tools.py +4 -2
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_model.py +42 -1
- inspect_ai/model/_providers/anthropic.py +4 -0
- inspect_ai/model/_render.py +9 -2
- inspect_ai/scorer/_metric.py +12 -1
- inspect_ai/solver/__init__.py +2 -0
- inspect_ai/solver/_human_agent/agent.py +83 -0
- inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
- inspect_ai/solver/_human_agent/commands/clock.py +70 -0
- inspect_ai/solver/_human_agent/commands/command.py +59 -0
- inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
- inspect_ai/solver/_human_agent/commands/note.py +42 -0
- inspect_ai/solver/_human_agent/commands/score.py +80 -0
- inspect_ai/solver/_human_agent/commands/status.py +62 -0
- inspect_ai/solver/_human_agent/commands/submit.py +151 -0
- inspect_ai/solver/_human_agent/install.py +222 -0
- inspect_ai/solver/_human_agent/panel.py +252 -0
- inspect_ai/solver/_human_agent/service.py +45 -0
- inspect_ai/solver/_human_agent/state.py +55 -0
- inspect_ai/solver/_human_agent/view.py +24 -0
- inspect_ai/solver/_task_state.py +28 -2
- inspect_ai/tool/_tool.py +10 -2
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +13 -10
- inspect_ai/util/__init__.py +8 -4
- inspect_ai/{_util/display.py → util/_display.py} +6 -0
- inspect_ai/util/_panel.py +31 -9
- inspect_ai/util/_sandbox/__init__.py +0 -3
- inspect_ai/util/_sandbox/context.py +5 -1
- inspect_ai/util/_sandbox/docker/compose.py +16 -10
- inspect_ai/util/_sandbox/docker/docker.py +9 -6
- inspect_ai/util/_sandbox/docker/internal.py +1 -1
- inspect_ai/util/_sandbox/docker/util.py +2 -2
- inspect_ai/util/_sandbox/environment.py +6 -5
- inspect_ai/util/_sandbox/local.py +1 -1
- inspect_ai/util/_sandbox/service.py +22 -7
- inspect_ai/util/_store.py +5 -6
- inspect_ai/util/_store_model.py +110 -0
- inspect_ai/util/_throttle.py +32 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/RECORD +95 -73
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/top_level.txt +0 -0
inspect_ai/_util/trace.py
CHANGED
@@ -8,10 +8,10 @@ import shutil
|
|
8
8
|
import time
|
9
9
|
import traceback
|
10
10
|
from contextlib import contextmanager
|
11
|
-
from
|
12
|
-
from logging
|
11
|
+
from dataclasses import dataclass
|
12
|
+
from logging import FileHandler, Logger
|
13
13
|
from pathlib import Path
|
14
|
-
from typing import Any, Generator, Literal, TextIO
|
14
|
+
from typing import Any, Callable, Generator, Literal, TextIO
|
15
15
|
|
16
16
|
import jsonlines
|
17
17
|
from pydantic import BaseModel, Field, JsonValue
|
@@ -25,6 +25,10 @@ def inspect_trace_dir() -> Path:
|
|
25
25
|
return inspect_data_dir("traces")
|
26
26
|
|
27
27
|
|
28
|
+
def inspect_trace_file() -> Path:
|
29
|
+
return inspect_trace_dir() / f"trace-{os.getpid()}.log"
|
30
|
+
|
31
|
+
|
28
32
|
@contextmanager
|
29
33
|
def trace_action(
|
30
34
|
logger: Logger, action: str, message: str, *args: Any, **kwargs: Any
|
@@ -32,7 +36,6 @@ def trace_action(
|
|
32
36
|
trace_id = uuid()
|
33
37
|
start_monotonic = time.monotonic()
|
34
38
|
start_wall = time.time()
|
35
|
-
pid = os.getpid()
|
36
39
|
detail = message % args if args else message % kwargs if kwargs else message
|
37
40
|
|
38
41
|
def trace_message(event: str) -> str:
|
@@ -47,7 +50,6 @@ def trace_action(
|
|
47
50
|
"event": "enter",
|
48
51
|
"trace_id": str(trace_id),
|
49
52
|
"start_time": start_wall,
|
50
|
-
"pid": pid,
|
51
53
|
},
|
52
54
|
)
|
53
55
|
|
@@ -63,7 +65,6 @@ def trace_action(
|
|
63
65
|
"event": "exit",
|
64
66
|
"trace_id": str(trace_id),
|
65
67
|
"duration": duration,
|
66
|
-
"pid": pid,
|
67
68
|
},
|
68
69
|
)
|
69
70
|
except (KeyboardInterrupt, asyncio.CancelledError):
|
@@ -77,7 +78,6 @@ def trace_action(
|
|
77
78
|
"event": "cancel",
|
78
79
|
"trace_id": str(trace_id),
|
79
80
|
"duration": duration,
|
80
|
-
"pid": pid,
|
81
81
|
},
|
82
82
|
)
|
83
83
|
raise
|
@@ -92,7 +92,6 @@ def trace_action(
|
|
92
92
|
"event": "timeout",
|
93
93
|
"trace_id": str(trace_id),
|
94
94
|
"duration": duration,
|
95
|
-
"pid": pid,
|
96
95
|
},
|
97
96
|
)
|
98
97
|
raise
|
@@ -110,7 +109,6 @@ def trace_action(
|
|
110
109
|
"error": getattr(ex, "message", str(ex)) or repr(ex),
|
111
110
|
"error_type": type(ex).__name__,
|
112
111
|
"stacktrace": traceback.format_exc(),
|
113
|
-
"pid": pid,
|
114
112
|
},
|
115
113
|
)
|
116
114
|
raise
|
@@ -153,7 +151,6 @@ class TraceFormatter(logging.Formatter):
|
|
153
151
|
"error",
|
154
152
|
"error_type",
|
155
153
|
"stacktrace",
|
156
|
-
"pid",
|
157
154
|
]:
|
158
155
|
if hasattr(record, key):
|
159
156
|
output[key] = getattr(record, key)
|
@@ -215,7 +212,22 @@ class ActionTraceRecord(TraceRecord):
|
|
215
212
|
error: str | None = Field(default=None)
|
216
213
|
error_type: str | None = Field(default=None)
|
217
214
|
stacktrace: str | None = Field(default=None)
|
218
|
-
|
215
|
+
|
216
|
+
|
217
|
+
@dataclass
|
218
|
+
class TraceFile:
|
219
|
+
file: Path
|
220
|
+
mtime: float
|
221
|
+
|
222
|
+
|
223
|
+
def list_trace_files() -> list[TraceFile]:
|
224
|
+
trace_files: list[TraceFile] = [
|
225
|
+
TraceFile(file=f, mtime=f.lstat().st_mtime)
|
226
|
+
for f in inspect_trace_dir().iterdir()
|
227
|
+
if f.is_file()
|
228
|
+
]
|
229
|
+
trace_files.sort(key=lambda f: f.mtime, reverse=True)
|
230
|
+
return trace_files
|
219
231
|
|
220
232
|
|
221
233
|
def read_trace_file(file: Path) -> list[TraceRecord]:
|
@@ -237,39 +249,23 @@ def read_trace_file(file: Path) -> list[TraceRecord]:
|
|
237
249
|
return read_file(f)
|
238
250
|
|
239
251
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
The modified filename with .gz extension
|
261
|
-
"""
|
262
|
-
return default_name + ".gz"
|
263
|
-
|
264
|
-
def rotate(self, source: str, dest: str) -> None:
|
265
|
-
"""
|
266
|
-
Compresses the source file and moves it to destination.
|
267
|
-
|
268
|
-
Args:
|
269
|
-
source: The source file to be compressed
|
270
|
-
dest: The destination path for the compressed file
|
271
|
-
"""
|
272
|
-
with open(source, "rb") as f_in:
|
273
|
-
with gzip.open(dest, "wb") as f_out:
|
274
|
-
shutil.copyfileobj(f_in, f_out)
|
275
|
-
os.remove(source)
|
252
|
+
def rotate_trace_files() -> None:
|
253
|
+
rotate_files = list_trace_files()[10:]
|
254
|
+
for file in rotate_files:
|
255
|
+
file.file.unlink(missing_ok=True)
|
256
|
+
|
257
|
+
|
258
|
+
def compress_trace_log(log_handler: FileHandler) -> Callable[[], None]:
|
259
|
+
def compress() -> None:
|
260
|
+
# ensure log is closed
|
261
|
+
log_handler.close()
|
262
|
+
|
263
|
+
# compress
|
264
|
+
trace_file = Path(log_handler.baseFilename)
|
265
|
+
if trace_file.exists():
|
266
|
+
with open(trace_file, "rb") as f_in:
|
267
|
+
with gzip.open(trace_file.with_suffix(".log.gz"), "wb") as f_out:
|
268
|
+
shutil.copyfileobj(f_in, f_out)
|
269
|
+
trace_file.unlink()
|
270
|
+
|
271
|
+
return compress
|
inspect_ai/_util/transcript.py
CHANGED
@@ -89,6 +89,10 @@ def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableT
|
|
89
89
|
return transcript_markdown("```python\n" + call + "\n```\n")
|
90
90
|
|
91
91
|
|
92
|
+
DOUBLE_LINE = Box(
|
93
|
+
" ══ \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n"
|
94
|
+
)
|
95
|
+
|
92
96
|
LINE = Box(" ── \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
|
93
97
|
|
94
98
|
DOTTED = Box(" ·· \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
|
@@ -0,0 +1,51 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from pydantic import BaseModel, Field
|
6
|
+
from pydantic_core import to_json
|
7
|
+
from shortuuid import uuid
|
8
|
+
|
9
|
+
from .appdirs import inspect_data_dir
|
10
|
+
|
11
|
+
|
12
|
+
class VSCodeCommand(BaseModel):
|
13
|
+
command: str
|
14
|
+
args: list[Any] = Field(default_factory=list)
|
15
|
+
|
16
|
+
|
17
|
+
def execute_vscode_commands(commands: VSCodeCommand | list[VSCodeCommand]) -> None:
|
18
|
+
# resolve to list
|
19
|
+
commands = commands if isinstance(commands, list) else [commands]
|
20
|
+
|
21
|
+
# ensure there is someone listening
|
22
|
+
command_dir = vs_code_commands_dir()
|
23
|
+
if command_dir is None:
|
24
|
+
raise NotImplementedError(
|
25
|
+
"Not running in VS Code session or have older version of Inspect AI extension"
|
26
|
+
)
|
27
|
+
|
28
|
+
command_file = command_dir / uuid()
|
29
|
+
with open(command_file, "w") as f:
|
30
|
+
f.write(to_json(commands).decode())
|
31
|
+
|
32
|
+
|
33
|
+
def can_execute_vscode_commands() -> bool:
|
34
|
+
return vs_code_commands_dir() is not None
|
35
|
+
|
36
|
+
|
37
|
+
def vs_code_commands_dir() -> Path | None:
|
38
|
+
workspace_id = vscode_workspace_id()
|
39
|
+
if workspace_id:
|
40
|
+
workspace_dir = inspect_data_dir(os.path.join("vscode", workspace_id))
|
41
|
+
if workspace_dir.exists():
|
42
|
+
commands_dir = workspace_dir / "commands"
|
43
|
+
return commands_dir if commands_dir.exists() else None
|
44
|
+
else:
|
45
|
+
return None
|
46
|
+
else:
|
47
|
+
return None
|
48
|
+
|
49
|
+
|
50
|
+
def vscode_workspace_id() -> str | None:
|
51
|
+
return os.environ.get("INSPECT_WORKSPACE_ID", None)
|
inspect_ai/_view/notify.py
CHANGED
@@ -4,6 +4,7 @@ from pathlib import Path
|
|
4
4
|
from urllib.parse import urlparse
|
5
5
|
|
6
6
|
from inspect_ai._util.appdirs import inspect_data_dir
|
7
|
+
from inspect_ai._util.vscode import vscode_workspace_id
|
7
8
|
|
8
9
|
# lightweight tracking of when the last eval task completed
|
9
10
|
# this enables the view client to poll for changes frequently
|
@@ -24,7 +25,7 @@ def view_notify_eval(location: str) -> None:
|
|
24
25
|
payload = {
|
25
26
|
"location": location,
|
26
27
|
}
|
27
|
-
workspace_id =
|
28
|
+
workspace_id = vscode_workspace_id()
|
28
29
|
if workspace_id:
|
29
30
|
payload["workspace_id"] = workspace_id
|
30
31
|
|
inspect_ai/_view/www/App.css
CHANGED
@@ -235,6 +235,10 @@ body[class^="vscode-"] .sidebar .list-group {
|
|
235
235
|
--bs-list-group-active-color: var(--vscode-sideBarSectionHeader-foreground);
|
236
236
|
}
|
237
237
|
|
238
|
+
body[class^="vscode-"] div.ap-control-bar .ap-fullscreen-button {
|
239
|
+
display: none;
|
240
|
+
}
|
241
|
+
|
238
242
|
:root {
|
239
243
|
--bs-navbar-padding-y: 0;
|
240
244
|
--bs-navbar-brand-padding-y: 0;
|
@@ -690,7 +694,7 @@ table.table.table-sm td {
|
|
690
694
|
}
|
691
695
|
|
692
696
|
@keyframes moveLeftToRight {
|
693
|
-
|
697
|
+
from {
|
694
698
|
margin-left: 0;
|
695
699
|
}
|
696
700
|
to {
|
@@ -724,6 +728,23 @@ pre[class*="language-"].tool-output {
|
|
724
728
|
border-radius: var(--bs-border-radius) !important;
|
725
729
|
}
|
726
730
|
|
731
|
+
/* lightbox styles */
|
732
|
+
|
733
|
+
|
734
|
+
.lightbox-overlay .close-button,
|
735
|
+
.lightbox-overlay .nav-button {
|
736
|
+
/* Hide by default */
|
737
|
+
opacity: 0;
|
738
|
+
pointer-events: none; /* so it doesn't register clicks when hidden */
|
739
|
+
transition: opacity 0.3s ease;
|
740
|
+
}
|
741
|
+
|
742
|
+
.lightbox-overlay:hover .close-button,
|
743
|
+
.lightbox-overlay .nav-button {
|
744
|
+
/* Show on hover */
|
745
|
+
opacity: 1;
|
746
|
+
pointer-events: auto;
|
747
|
+
}
|
727
748
|
|
728
749
|
/* jsondiffpatch */
|
729
750
|
|