inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +1 -0
- inspect_ai/_cli/common.py +1 -1
- inspect_ai/_cli/trace.py +33 -20
- inspect_ai/_display/core/active.py +1 -1
- inspect_ai/_display/core/display.py +1 -1
- inspect_ai/_display/core/footer.py +1 -1
- inspect_ai/_display/core/panel.py +1 -1
- inspect_ai/_display/core/progress.py +0 -6
- inspect_ai/_display/core/rich.py +1 -1
- inspect_ai/_display/rich/display.py +2 -2
- inspect_ai/_display/textual/app.py +15 -17
- inspect_ai/_display/textual/widgets/clock.py +3 -3
- inspect_ai/_display/textual/widgets/samples.py +6 -13
- inspect_ai/_eval/context.py +9 -1
- inspect_ai/_eval/run.py +16 -11
- inspect_ai/_eval/score.py +4 -10
- inspect_ai/_eval/task/results.py +5 -4
- inspect_ai/_eval/task/run.py +6 -12
- inspect_ai/_eval/task/task.py +10 -0
- inspect_ai/_util/ansi.py +31 -0
- inspect_ai/_util/datetime.py +1 -1
- inspect_ai/_util/deprecation.py +1 -1
- inspect_ai/_util/format.py +7 -0
- inspect_ai/_util/json.py +11 -1
- inspect_ai/_util/logger.py +14 -13
- inspect_ai/_util/throttle.py +10 -1
- inspect_ai/_util/trace.py +79 -47
- inspect_ai/_util/transcript.py +37 -4
- inspect_ai/_util/vscode.py +51 -0
- inspect_ai/_view/notify.py +2 -1
- inspect_ai/_view/www/.prettierrc.js +12 -0
- inspect_ai/_view/www/App.css +22 -1
- inspect_ai/_view/www/dist/assets/index.css +2374 -2
- inspect_ai/_view/www/dist/assets/index.js +29752 -24492
- inspect_ai/_view/www/log-schema.json +262 -215
- inspect_ai/_view/www/package.json +1 -0
- inspect_ai/_view/www/src/App.mjs +19 -9
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/api/Types.mjs +15 -4
- inspect_ai/_view/www/src/api/api-http.mjs +2 -0
- inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
- inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
- inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
- inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
- inspect_ai/_view/www/src/components/Tools.mjs +28 -5
- inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
- inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
- inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
- inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
- inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
- inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +28 -20
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
- inspect_ai/_view/www/yarn.lock +44 -0
- inspect_ai/approval/_apply.py +4 -0
- inspect_ai/approval/_human/panel.py +5 -8
- inspect_ai/dataset/_dataset.py +51 -10
- inspect_ai/dataset/_util.py +31 -3
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_log.py +30 -2
- inspect_ai/log/_recorders/eval.py +2 -0
- inspect_ai/model/_call_tools.py +31 -7
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_model.py +42 -1
- inspect_ai/model/_providers/anthropic.py +4 -0
- inspect_ai/model/_providers/google.py +24 -6
- inspect_ai/model/_providers/openai.py +17 -3
- inspect_ai/model/_providers/openai_o1.py +10 -12
- inspect_ai/model/_render.py +9 -2
- inspect_ai/scorer/_metric.py +12 -1
- inspect_ai/solver/__init__.py +2 -0
- inspect_ai/solver/_human_agent/agent.py +83 -0
- inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
- inspect_ai/solver/_human_agent/commands/clock.py +70 -0
- inspect_ai/solver/_human_agent/commands/command.py +59 -0
- inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
- inspect_ai/solver/_human_agent/commands/note.py +42 -0
- inspect_ai/solver/_human_agent/commands/score.py +80 -0
- inspect_ai/solver/_human_agent/commands/status.py +62 -0
- inspect_ai/solver/_human_agent/commands/submit.py +151 -0
- inspect_ai/solver/_human_agent/install.py +222 -0
- inspect_ai/solver/_human_agent/panel.py +252 -0
- inspect_ai/solver/_human_agent/service.py +45 -0
- inspect_ai/solver/_human_agent/state.py +55 -0
- inspect_ai/solver/_human_agent/view.py +24 -0
- inspect_ai/solver/_task_state.py +28 -2
- inspect_ai/tool/_tool.py +10 -2
- inspect_ai/tool/_tool_info.py +2 -1
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
- inspect_ai/util/__init__.py +12 -4
- inspect_ai/{_util/display.py → util/_display.py} +6 -0
- inspect_ai/util/_panel.py +31 -9
- inspect_ai/util/_sandbox/__init__.py +0 -3
- inspect_ai/util/_sandbox/context.py +5 -1
- inspect_ai/util/_sandbox/docker/compose.py +17 -13
- inspect_ai/util/_sandbox/docker/docker.py +9 -6
- inspect_ai/util/_sandbox/docker/internal.py +1 -1
- inspect_ai/util/_sandbox/docker/util.py +3 -2
- inspect_ai/util/_sandbox/environment.py +6 -5
- inspect_ai/util/_sandbox/local.py +1 -1
- inspect_ai/util/_sandbox/self_check.py +18 -18
- inspect_ai/util/_sandbox/service.py +22 -7
- inspect_ai/util/_store.py +7 -8
- inspect_ai/util/_store_model.py +110 -0
- inspect_ai/util/_subprocess.py +3 -3
- inspect_ai/util/_throttle.py +32 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
inspect_ai/_util/deprecation.py
CHANGED
@@ -174,7 +174,7 @@ def default_deprecation_msg(
|
|
174
174
|
|
175
175
|
_qual = getattr(obj, "__qualname__", "") or ""
|
176
176
|
if _qual.endswith(".__init__") or _qual.endswith(".__new__"):
|
177
|
-
_obj = f
|
177
|
+
_obj = f" class ({_qual.rsplit('.', 1)[0]})"
|
178
178
|
elif _qual and _obj:
|
179
179
|
_obj += f" ({_qual})"
|
180
180
|
|
inspect_ai/_util/format.py
CHANGED
@@ -26,3 +26,10 @@ def format_value(value: object, width: int) -> str:
|
|
26
26
|
elif isinstance(value, list | tuple | dict):
|
27
27
|
return pprint.pformat(value, width=width)
|
28
28
|
return str(value)
|
29
|
+
|
30
|
+
|
31
|
+
def format_progress_time(time: float, pad_hours: bool = True) -> str:
|
32
|
+
minutes, seconds = divmod(time, 60)
|
33
|
+
hours, minutes = divmod(minutes, 60)
|
34
|
+
hours_fmt = f"{hours:2.0f}" if pad_hours else f"{hours:.0f}"
|
35
|
+
return f"{hours_fmt}:{minutes:02.0f}:{seconds:02.0f}"
|
inspect_ai/_util/json.py
CHANGED
@@ -103,10 +103,20 @@ def json_changes(
|
|
103
103
|
paths = json_change.path.split("/")[1:]
|
104
104
|
replaced = before
|
105
105
|
for path in paths:
|
106
|
-
|
106
|
+
decoded_path = decode_json_pointer_segment(path)
|
107
|
+
index: Any = (
|
108
|
+
int(decoded_path) if decoded_path.isnumeric() else decoded_path
|
109
|
+
)
|
107
110
|
replaced = replaced[index]
|
108
111
|
json_change.replaced = replaced
|
109
112
|
changes.append(json_change)
|
110
113
|
return changes
|
111
114
|
else:
|
112
115
|
return None
|
116
|
+
|
117
|
+
|
118
|
+
def decode_json_pointer_segment(segment: str) -> str:
|
119
|
+
"""Decode a single JSON Pointer segment."""
|
120
|
+
# JSON points encode ~ and / because they are special characters
|
121
|
+
# this decodes these values (https://www.rfc-editor.org/rfc/rfc6901)
|
122
|
+
return segment.replace("~1", "/").replace("~0", "~")
|
inspect_ai/_util/logger.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
import atexit
|
1
2
|
import os
|
3
|
+
import re
|
2
4
|
from logging import (
|
3
5
|
DEBUG,
|
4
6
|
INFO,
|
@@ -30,7 +32,12 @@ from .constants import (
|
|
30
32
|
TRACE_LOG_LEVEL,
|
31
33
|
)
|
32
34
|
from .error import PrerequisiteError
|
33
|
-
from .trace import
|
35
|
+
from .trace import (
|
36
|
+
TraceFormatter,
|
37
|
+
compress_trace_log,
|
38
|
+
inspect_trace_file,
|
39
|
+
rotate_trace_files,
|
40
|
+
)
|
34
41
|
|
35
42
|
TRACE_FILE_NAME = "trace.log"
|
36
43
|
|
@@ -56,19 +63,13 @@ class LogHandler(RichHandler):
|
|
56
63
|
else:
|
57
64
|
self.file_logger_level = 0
|
58
65
|
|
59
|
-
# add a trace handler
|
60
|
-
|
61
|
-
have_existing_trace_file = default_trace_file.exists()
|
66
|
+
# add a trace file handler
|
67
|
+
rotate_trace_files() # remove oldest if > 10 trace files
|
62
68
|
env_trace_file = os.environ.get("INSPECT_TRACE_FILE", None)
|
63
|
-
trace_file = Path(env_trace_file) if env_trace_file else
|
64
|
-
|
65
|
-
self.trace_logger = TraceFileHandler(
|
66
|
-
trace_file.as_posix(),
|
67
|
-
backupCount=trace_total_files - 1, # exclude the current file (10 total)
|
68
|
-
)
|
69
|
+
trace_file = Path(env_trace_file) if env_trace_file else inspect_trace_file()
|
70
|
+
self.trace_logger = FileHandler(trace_file)
|
69
71
|
self.trace_logger.setFormatter(TraceFormatter())
|
70
|
-
|
71
|
-
self.trace_logger.doRollover()
|
72
|
+
atexit.register(compress_trace_log(self.trace_logger))
|
72
73
|
|
73
74
|
# set trace level
|
74
75
|
trace_level = os.environ.get("INSPECT_TRACE_LEVEL", TRACE_LOG_LEVEL)
|
@@ -182,7 +183,7 @@ def notify_logger_record(record: LogRecord, write: bool) -> None:
|
|
182
183
|
if write:
|
183
184
|
transcript()._event(LoggerEvent(message=LoggingMessage.from_log_record(record)))
|
184
185
|
global _rate_limit_count
|
185
|
-
if (record.levelno <= INFO and "
|
186
|
+
if (record.levelno <= INFO and re.search(r"\b429\b", record.getMessage())) or (
|
186
187
|
record.levelno == DEBUG
|
187
188
|
# See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#validating-retry-attempts
|
188
189
|
# for boto retry logic / log messages (this is tracking standard or adapative retries)
|
inspect_ai/_util/throttle.py
CHANGED
@@ -3,7 +3,16 @@ from functools import wraps
|
|
3
3
|
from typing import Any, Callable
|
4
4
|
|
5
5
|
|
6
|
-
def throttle(seconds:
|
6
|
+
def throttle(seconds: float) -> Callable[..., Any]:
|
7
|
+
"""Throttle a function to ensure it is called no more than every n seconds.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
seconds (float): Throttle time.
|
11
|
+
|
12
|
+
Returns:
|
13
|
+
Callable: Throttled function.
|
14
|
+
"""
|
15
|
+
|
7
16
|
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
8
17
|
last_called: float = 0
|
9
18
|
last_result: Any = None
|
inspect_ai/_util/trace.py
CHANGED
@@ -8,10 +8,10 @@ import shutil
|
|
8
8
|
import time
|
9
9
|
import traceback
|
10
10
|
from contextlib import contextmanager
|
11
|
-
from
|
12
|
-
from logging
|
11
|
+
from dataclasses import dataclass
|
12
|
+
from logging import FileHandler, Logger
|
13
13
|
from pathlib import Path
|
14
|
-
from typing import Any, Generator, Literal, TextIO
|
14
|
+
from typing import Any, Callable, Generator, Literal, TextIO
|
15
15
|
|
16
16
|
import jsonlines
|
17
17
|
from pydantic import BaseModel, Field, JsonValue
|
@@ -25,14 +25,33 @@ def inspect_trace_dir() -> Path:
|
|
25
25
|
return inspect_data_dir("traces")
|
26
26
|
|
27
27
|
|
28
|
+
def inspect_trace_file() -> Path:
|
29
|
+
return inspect_trace_dir() / f"trace-{os.getpid()}.log"
|
30
|
+
|
31
|
+
|
28
32
|
@contextmanager
|
29
33
|
def trace_action(
|
30
34
|
logger: Logger, action: str, message: str, *args: Any, **kwargs: Any
|
31
35
|
) -> Generator[None, None, None]:
|
36
|
+
"""Trace a long running or poentially unreliable action.
|
37
|
+
|
38
|
+
Trace actions for which you want to collect data on the resolution
|
39
|
+
(e.g. succeeded, cancelled, failed, timed out, etc.) and duration of.
|
40
|
+
|
41
|
+
Traces are written to the `TRACE` log level (which is just below
|
42
|
+
`HTTP` and `INFO`). List and read trace logs with `inspect trace list`
|
43
|
+
and related commands (see `inspect trace --help` for details).
|
44
|
+
|
45
|
+
Args:
|
46
|
+
logger (Logger): Logger to use for tracing (e.g. from `getLogger(__name__)`)
|
47
|
+
action (str): Name of action to trace (e.g. 'Model', 'Subprocess', etc.)
|
48
|
+
message (str): Message describing action (can be a format string w/ args or kwargs)
|
49
|
+
*args (Any): Positional arguments for `message` format string.
|
50
|
+
**kwargs (Any): Named args for `message` format string.
|
51
|
+
"""
|
32
52
|
trace_id = uuid()
|
33
53
|
start_monotonic = time.monotonic()
|
34
54
|
start_wall = time.time()
|
35
|
-
pid = os.getpid()
|
36
55
|
detail = message % args if args else message % kwargs if kwargs else message
|
37
56
|
|
38
57
|
def trace_message(event: str) -> str:
|
@@ -47,7 +66,6 @@ def trace_action(
|
|
47
66
|
"event": "enter",
|
48
67
|
"trace_id": str(trace_id),
|
49
68
|
"start_time": start_wall,
|
50
|
-
"pid": pid,
|
51
69
|
},
|
52
70
|
)
|
53
71
|
|
@@ -63,7 +81,6 @@ def trace_action(
|
|
63
81
|
"event": "exit",
|
64
82
|
"trace_id": str(trace_id),
|
65
83
|
"duration": duration,
|
66
|
-
"pid": pid,
|
67
84
|
},
|
68
85
|
)
|
69
86
|
except (KeyboardInterrupt, asyncio.CancelledError):
|
@@ -77,7 +94,6 @@ def trace_action(
|
|
77
94
|
"event": "cancel",
|
78
95
|
"trace_id": str(trace_id),
|
79
96
|
"duration": duration,
|
80
|
-
"pid": pid,
|
81
97
|
},
|
82
98
|
)
|
83
99
|
raise
|
@@ -92,7 +108,6 @@ def trace_action(
|
|
92
108
|
"event": "timeout",
|
93
109
|
"trace_id": str(trace_id),
|
94
110
|
"duration": duration,
|
95
|
-
"pid": pid,
|
96
111
|
},
|
97
112
|
)
|
98
113
|
raise
|
@@ -110,7 +125,6 @@ def trace_action(
|
|
110
125
|
"error": getattr(ex, "message", str(ex)) or repr(ex),
|
111
126
|
"error_type": type(ex).__name__,
|
112
127
|
"stacktrace": traceback.format_exc(),
|
113
|
-
"pid": pid,
|
114
128
|
},
|
115
129
|
)
|
116
130
|
raise
|
@@ -119,6 +133,19 @@ def trace_action(
|
|
119
133
|
def trace_message(
|
120
134
|
logger: Logger, category: str, message: str, *args: Any, **kwargs: Any
|
121
135
|
) -> None:
|
136
|
+
"""Log a message using the TRACE log level.
|
137
|
+
|
138
|
+
The `TRACE` log level is just below `HTTP` and `INFO`). List and
|
139
|
+
read trace logs with `inspect trace list` and related commands
|
140
|
+
(see `inspect trace --help` for details).
|
141
|
+
|
142
|
+
Args:
|
143
|
+
logger (Logger): Logger to use for tracing (e.g. from `getLogger(__name__)`)
|
144
|
+
category (str): Category of trace message.
|
145
|
+
message (str): Trace message (can be a format string w/ args or kwargs)
|
146
|
+
*args (Any): Positional arguments for `message` format string.
|
147
|
+
**kwargs (Any): Named args for `message` format string.
|
148
|
+
"""
|
122
149
|
logger.log(TRACE, f"[{category}] {message}", *args, **kwargs)
|
123
150
|
|
124
151
|
|
@@ -153,7 +180,6 @@ class TraceFormatter(logging.Formatter):
|
|
153
180
|
"error",
|
154
181
|
"error_type",
|
155
182
|
"stacktrace",
|
156
|
-
"pid",
|
157
183
|
]:
|
158
184
|
if hasattr(record, key):
|
159
185
|
output[key] = getattr(record, key)
|
@@ -215,7 +241,22 @@ class ActionTraceRecord(TraceRecord):
|
|
215
241
|
error: str | None = Field(default=None)
|
216
242
|
error_type: str | None = Field(default=None)
|
217
243
|
stacktrace: str | None = Field(default=None)
|
218
|
-
|
244
|
+
|
245
|
+
|
246
|
+
@dataclass
|
247
|
+
class TraceFile:
|
248
|
+
file: Path
|
249
|
+
mtime: float
|
250
|
+
|
251
|
+
|
252
|
+
def list_trace_files() -> list[TraceFile]:
|
253
|
+
trace_files: list[TraceFile] = [
|
254
|
+
TraceFile(file=f, mtime=f.lstat().st_mtime)
|
255
|
+
for f in inspect_trace_dir().iterdir()
|
256
|
+
if f.is_file()
|
257
|
+
]
|
258
|
+
trace_files.sort(key=lambda f: f.mtime, reverse=True)
|
259
|
+
return trace_files
|
219
260
|
|
220
261
|
|
221
262
|
def read_trace_file(file: Path) -> list[TraceRecord]:
|
@@ -237,39 +278,30 @@ def read_trace_file(file: Path) -> list[TraceRecord]:
|
|
237
278
|
return read_file(f)
|
238
279
|
|
239
280
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
Args:
|
269
|
-
source: The source file to be compressed
|
270
|
-
dest: The destination path for the compressed file
|
271
|
-
"""
|
272
|
-
with open(source, "rb") as f_in:
|
273
|
-
with gzip.open(dest, "wb") as f_out:
|
274
|
-
shutil.copyfileobj(f_in, f_out)
|
275
|
-
os.remove(source)
|
281
|
+
def rotate_trace_files() -> None:
|
282
|
+
# if multiple inspect processes start up at once they
|
283
|
+
# will all be attempting to rotate at the same time,
|
284
|
+
# which can lead to FileNotFoundError -- ignore these
|
285
|
+
# errors if they occur
|
286
|
+
try:
|
287
|
+
rotate_files = list_trace_files()[10:]
|
288
|
+
for file in rotate_files:
|
289
|
+
file.file.unlink(missing_ok=True)
|
290
|
+
except FileNotFoundError:
|
291
|
+
pass
|
292
|
+
|
293
|
+
|
294
|
+
def compress_trace_log(log_handler: FileHandler) -> Callable[[], None]:
|
295
|
+
def compress() -> None:
|
296
|
+
# ensure log is closed
|
297
|
+
log_handler.close()
|
298
|
+
|
299
|
+
# compress
|
300
|
+
trace_file = Path(log_handler.baseFilename)
|
301
|
+
if trace_file.exists():
|
302
|
+
with open(trace_file, "rb") as f_in:
|
303
|
+
with gzip.open(trace_file.with_suffix(".log.gz"), "wb") as f_out:
|
304
|
+
shutil.copyfileobj(f_in, f_out)
|
305
|
+
trace_file.unlink()
|
306
|
+
|
307
|
+
return compress
|
inspect_ai/_util/transcript.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import html
|
2
|
+
import re
|
2
3
|
from typing import Any
|
3
4
|
|
4
5
|
from rich.align import AlignMethod
|
@@ -19,13 +20,43 @@ def transcript_code_theme() -> str:
|
|
19
20
|
def transcript_markdown(content: str, *, escape: bool = False) -> Markdown:
|
20
21
|
code_theme = transcript_code_theme()
|
21
22
|
return Markdown(
|
22
|
-
|
23
|
+
html_escape_markdown(content) if escape else content,
|
23
24
|
code_theme=code_theme,
|
24
25
|
inline_code_lexer="python",
|
25
26
|
inline_code_theme=code_theme,
|
26
27
|
)
|
27
28
|
|
28
29
|
|
30
|
+
def html_escape_markdown(content: str) -> str:
|
31
|
+
"""Escape markdown lines that aren't in a code block."""
|
32
|
+
codeblock_pattern = re.compile("`{3,}")
|
33
|
+
current_codeblock = ""
|
34
|
+
escaped: list[str] = []
|
35
|
+
lines = content.splitlines()
|
36
|
+
for line in lines:
|
37
|
+
# look for matching end of codeblock
|
38
|
+
if current_codeblock:
|
39
|
+
if current_codeblock in line:
|
40
|
+
current_codeblock = ""
|
41
|
+
escaped.append(line)
|
42
|
+
continue
|
43
|
+
|
44
|
+
# look for beginning of codeblock
|
45
|
+
match = codeblock_pattern.search(line)
|
46
|
+
if match:
|
47
|
+
current_codeblock = match[0]
|
48
|
+
escaped.append(line)
|
49
|
+
continue
|
50
|
+
|
51
|
+
# escape if we are not in a codeblock
|
52
|
+
if current_codeblock:
|
53
|
+
escaped.append(line)
|
54
|
+
else:
|
55
|
+
escaped.append(html.escape(line, quote=False))
|
56
|
+
|
57
|
+
return "\n".join(escaped)
|
58
|
+
|
59
|
+
|
29
60
|
def set_transcript_markdown_options(markdown: Markdown) -> None:
|
30
61
|
code_theme = transcript_code_theme()
|
31
62
|
markdown.code_theme = code_theme
|
@@ -89,8 +120,10 @@ def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableT
|
|
89
120
|
return transcript_markdown("```python\n" + call + "\n```\n")
|
90
121
|
|
91
122
|
|
92
|
-
|
123
|
+
DOUBLE_LINE = Box(" ══ \n \n \n \n \n \n \n \n")
|
124
|
+
|
125
|
+
LINE = Box(" ── \n \n \n \n \n \n \n \n")
|
93
126
|
|
94
|
-
DOTTED = Box(" ·· \n
|
127
|
+
DOTTED = Box(" ·· \n \n \n \n \n \n \n \n")
|
95
128
|
|
96
|
-
NOBORDER = Box(" \n
|
129
|
+
NOBORDER = Box(" \n \n \n \n \n \n \n \n")
|
@@ -0,0 +1,51 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from pydantic import BaseModel, Field
|
6
|
+
from pydantic_core import to_json
|
7
|
+
from shortuuid import uuid
|
8
|
+
|
9
|
+
from .appdirs import inspect_data_dir
|
10
|
+
|
11
|
+
|
12
|
+
class VSCodeCommand(BaseModel):
|
13
|
+
command: str
|
14
|
+
args: list[Any] = Field(default_factory=list)
|
15
|
+
|
16
|
+
|
17
|
+
def execute_vscode_commands(commands: VSCodeCommand | list[VSCodeCommand]) -> None:
|
18
|
+
# resolve to list
|
19
|
+
commands = commands if isinstance(commands, list) else [commands]
|
20
|
+
|
21
|
+
# ensure there is someone listening
|
22
|
+
command_dir = vs_code_commands_dir()
|
23
|
+
if command_dir is None:
|
24
|
+
raise NotImplementedError(
|
25
|
+
"Not running in VS Code session or have older version of Inspect AI extension"
|
26
|
+
)
|
27
|
+
|
28
|
+
command_file = command_dir / uuid()
|
29
|
+
with open(command_file, "w") as f:
|
30
|
+
f.write(to_json(commands).decode())
|
31
|
+
|
32
|
+
|
33
|
+
def can_execute_vscode_commands() -> bool:
|
34
|
+
return vs_code_commands_dir() is not None
|
35
|
+
|
36
|
+
|
37
|
+
def vs_code_commands_dir() -> Path | None:
|
38
|
+
workspace_id = vscode_workspace_id()
|
39
|
+
if workspace_id:
|
40
|
+
workspace_dir = inspect_data_dir(os.path.join("vscode", workspace_id))
|
41
|
+
if workspace_dir.exists():
|
42
|
+
commands_dir = workspace_dir / "commands"
|
43
|
+
return commands_dir if commands_dir.exists() else None
|
44
|
+
else:
|
45
|
+
return None
|
46
|
+
else:
|
47
|
+
return None
|
48
|
+
|
49
|
+
|
50
|
+
def vscode_workspace_id() -> str | None:
|
51
|
+
return os.environ.get("INSPECT_WORKSPACE_ID", None)
|
inspect_ai/_view/notify.py
CHANGED
@@ -4,6 +4,7 @@ from pathlib import Path
|
|
4
4
|
from urllib.parse import urlparse
|
5
5
|
|
6
6
|
from inspect_ai._util.appdirs import inspect_data_dir
|
7
|
+
from inspect_ai._util.vscode import vscode_workspace_id
|
7
8
|
|
8
9
|
# lightweight tracking of when the last eval task completed
|
9
10
|
# this enables the view client to poll for changes frequently
|
@@ -24,7 +25,7 @@ def view_notify_eval(location: str) -> None:
|
|
24
25
|
payload = {
|
25
26
|
"location": location,
|
26
27
|
}
|
27
|
-
workspace_id =
|
28
|
+
workspace_id = vscode_workspace_id()
|
28
29
|
if workspace_id:
|
29
30
|
payload["workspace_id"] = workspace_id
|
30
31
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
// Do not remove this file even if the config is empty!
|
2
|
+
// VSCode's "Format Document" will respect this config and use the default
|
3
|
+
// settings, which is what we want. Without prettierrc, VSCode falls back to
|
4
|
+
// users settings, which could be different.
|
5
|
+
|
6
|
+
/**
|
7
|
+
* @see https://prettier.io/docs/en/configuration.html
|
8
|
+
* @type {import("prettier").Config}
|
9
|
+
*/
|
10
|
+
const config = {};
|
11
|
+
|
12
|
+
export default config;
|
inspect_ai/_view/www/App.css
CHANGED
@@ -235,6 +235,10 @@ body[class^="vscode-"] .sidebar .list-group {
|
|
235
235
|
--bs-list-group-active-color: var(--vscode-sideBarSectionHeader-foreground);
|
236
236
|
}
|
237
237
|
|
238
|
+
body[class^="vscode-"] div.ap-control-bar .ap-fullscreen-button {
|
239
|
+
display: none;
|
240
|
+
}
|
241
|
+
|
238
242
|
:root {
|
239
243
|
--bs-navbar-padding-y: 0;
|
240
244
|
--bs-navbar-brand-padding-y: 0;
|
@@ -690,7 +694,7 @@ table.table.table-sm td {
|
|
690
694
|
}
|
691
695
|
|
692
696
|
@keyframes moveLeftToRight {
|
693
|
-
|
697
|
+
from {
|
694
698
|
margin-left: 0;
|
695
699
|
}
|
696
700
|
to {
|
@@ -724,6 +728,23 @@ pre[class*="language-"].tool-output {
|
|
724
728
|
border-radius: var(--bs-border-radius) !important;
|
725
729
|
}
|
726
730
|
|
731
|
+
/* lightbox styles */
|
732
|
+
|
733
|
+
|
734
|
+
.lightbox-overlay .close-button,
|
735
|
+
.lightbox-overlay .nav-button {
|
736
|
+
/* Hide by default */
|
737
|
+
opacity: 0;
|
738
|
+
pointer-events: none; /* so it doesn't register clicks when hidden */
|
739
|
+
transition: opacity 0.3s ease;
|
740
|
+
}
|
741
|
+
|
742
|
+
.lightbox-overlay:hover .close-button,
|
743
|
+
.lightbox-overlay .nav-button {
|
744
|
+
/* Show on hover */
|
745
|
+
opacity: 1;
|
746
|
+
pointer-events: auto;
|
747
|
+
}
|
727
748
|
|
728
749
|
/* jsondiffpatch */
|
729
750
|
|