inspect-ai 0.3.58__py3-none-any.whl → 0.3.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +3 -1
- inspect_ai/_cli/eval.py +15 -2
- inspect_ai/_display/core/active.py +4 -1
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +0 -5
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +78 -11
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/score.py +1 -0
- inspect_ai/_eval/task/results.py +50 -22
- inspect_ai/_eval/task/run.py +41 -7
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25344 -1849
- inspect_ai/_view/www/log-schema.json +32 -2
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +8 -10
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +75 -2
- inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +24 -12
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +13 -2
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/Json.mjs +12 -6
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/log/_log.py +1 -1
- inspect_ai/log/_samples.py +16 -0
- inspect_ai/log/_transcript.py +4 -1
- inspect_ai/model/_call_tools.py +4 -0
- inspect_ai/model/_conversation.py +20 -8
- inspect_ai/model/_generate_config.py +10 -4
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +7 -2
- inspect_ai/model/_providers/anthropic.py +100 -44
- inspect_ai/model/_providers/azureai.py +20 -20
- inspect_ai/model/_providers/bedrock.py +37 -40
- inspect_ai/model/_providers/google.py +46 -54
- inspect_ai/model/_providers/mistral.py +11 -11
- inspect_ai/model/_providers/openai.py +15 -16
- inspect_ai/model/_providers/openai_o1.py +9 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +8 -8
- inspect_ai/model/_providers/vertex.py +1 -4
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +2 -2
- inspect_ai/solver/__init__.py +2 -5
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +12 -1
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/docker/docker.py +64 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/environment.py +14 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +126 -98
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/score.py
CHANGED
inspect_ai/_eval/task/results.py
CHANGED
@@ -2,6 +2,7 @@ import fnmatch
|
|
2
2
|
import re
|
3
3
|
from collections import defaultdict
|
4
4
|
from copy import deepcopy
|
5
|
+
from dataclasses import dataclass, field
|
5
6
|
from typing import Any, Tuple, cast
|
6
7
|
|
7
8
|
from inspect_ai._util.registry import (
|
@@ -19,6 +20,8 @@ from inspect_ai.log import (
|
|
19
20
|
from inspect_ai.log._log import EvalSampleReductions
|
20
21
|
from inspect_ai.scorer import Metric, Score, Scorer
|
21
22
|
from inspect_ai.scorer._metric import SampleScore
|
23
|
+
from inspect_ai.scorer._metrics.accuracy import accuracy
|
24
|
+
from inspect_ai.scorer._metrics.std import stderr
|
22
25
|
from inspect_ai.scorer._reducer import ScoreReducer, mean_score, reducer_log_name
|
23
26
|
from inspect_ai.scorer._scorer import (
|
24
27
|
SCORER_METRICS,
|
@@ -27,6 +30,27 @@ from inspect_ai.scorer._scorer import (
|
|
27
30
|
)
|
28
31
|
|
29
32
|
|
33
|
+
@dataclass
|
34
|
+
class ScorerInfo:
|
35
|
+
name: str
|
36
|
+
metrics: list[Metric | dict[str, list[Metric]]] | dict[str, list[Metric]]
|
37
|
+
params: dict[str, Any] = field(default_factory=dict)
|
38
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
39
|
+
|
40
|
+
@staticmethod
|
41
|
+
def from_scorer(scorer: Scorer) -> "ScorerInfo":
|
42
|
+
name = registry_unqualified_name(scorer)
|
43
|
+
metrics = scorer_metrics(scorer)
|
44
|
+
metadata = deepcopy(registry_info(scorer).metadata)
|
45
|
+
del metadata[SCORER_METRICS]
|
46
|
+
params = registry_params(scorer)
|
47
|
+
return ScorerInfo(name=name, metrics=metrics, params=params, metadata=metadata)
|
48
|
+
|
49
|
+
@staticmethod
|
50
|
+
def from_name(name: str) -> "ScorerInfo":
|
51
|
+
return ScorerInfo(name=name, metrics=[accuracy(), stderr()])
|
52
|
+
|
53
|
+
|
30
54
|
def eval_results(
|
31
55
|
samples: int,
|
32
56
|
scores: list[dict[str, SampleScore]],
|
@@ -38,18 +62,23 @@ def eval_results(
|
|
38
62
|
results = EvalResults(total_samples=samples, completed_samples=len(scores))
|
39
63
|
reductions = None
|
40
64
|
|
65
|
+
# extract scorers info from scorers then create scorers info for any
|
66
|
+
# scores not already accounted for by a scorer name
|
67
|
+
scorers_info = [ScorerInfo.from_scorer(scorer) for scorer in (scorers or [])]
|
68
|
+
scorer_names = [info.name for info in scorers_info]
|
69
|
+
for name in set(key for sample_scores in scores for key in sample_scores):
|
70
|
+
if name not in scorer_names:
|
71
|
+
scorers_info.append(ScorerInfo.from_name(name))
|
72
|
+
scorer_names.append(name)
|
73
|
+
|
41
74
|
# record scorer
|
42
|
-
if
|
75
|
+
if len(scorers_info) > 0:
|
43
76
|
result_scores: list[EvalScore] = []
|
44
77
|
sample_reductions: list[EvalSampleReductions] = []
|
45
|
-
for
|
46
|
-
# extract non-metrics metadata
|
47
|
-
metadata = deepcopy(registry_info(scorer).metadata)
|
48
|
-
del metadata[SCORER_METRICS]
|
49
|
-
|
78
|
+
for scorer_info in scorers_info:
|
50
79
|
# this scorer
|
51
80
|
scorer_name = unique_scorer_name(
|
52
|
-
|
81
|
+
scorer_info.name, [eval_score.name for eval_score in result_scores]
|
53
82
|
)
|
54
83
|
|
55
84
|
# scores for this scorer
|
@@ -75,7 +104,7 @@ def eval_results(
|
|
75
104
|
|
76
105
|
# Compute metrics for this scorer
|
77
106
|
simple_scores = cast(list[Score], reduced_scores)
|
78
|
-
targets = metrics if metrics is not None else
|
107
|
+
targets = metrics if metrics is not None else scorer_info.metrics
|
79
108
|
if isinstance(targets, list):
|
80
109
|
## split the metrics into the simple metrics and any dictionary
|
81
110
|
## metrics, to be processed independently
|
@@ -88,8 +117,7 @@ def eval_results(
|
|
88
117
|
result_scores.extend(
|
89
118
|
scorer_for_metrics(
|
90
119
|
scorer_name=scorer_name,
|
91
|
-
|
92
|
-
metadata=metadata,
|
120
|
+
scorer_info=scorer_info,
|
93
121
|
scores=simple_scores,
|
94
122
|
metrics=simple_metrics,
|
95
123
|
reducer_name=reducer_display_nm,
|
@@ -99,8 +127,7 @@ def eval_results(
|
|
99
127
|
result_scores.extend(
|
100
128
|
scorers_from_metric_dict(
|
101
129
|
scorer_name=scorer_name,
|
102
|
-
|
103
|
-
metadata=metadata,
|
130
|
+
scorer_info=scorer_info,
|
104
131
|
scores=simple_scores,
|
105
132
|
metrics=dict_metric,
|
106
133
|
reducer_name=reducer_display_nm,
|
@@ -116,8 +143,7 @@ def eval_results(
|
|
116
143
|
result_scores.extend(
|
117
144
|
scorers_from_metric_dict(
|
118
145
|
scorer_name=scorer_name,
|
119
|
-
|
120
|
-
metadata=metadata,
|
146
|
+
scorer_info=scorer_info,
|
121
147
|
scores=simple_scores,
|
122
148
|
metrics=targets,
|
123
149
|
reducer_name=reducer_display_nm,
|
@@ -156,8 +182,7 @@ def split_metrics(
|
|
156
182
|
|
157
183
|
def scorer_for_metrics(
|
158
184
|
scorer_name: str,
|
159
|
-
|
160
|
-
metadata: dict[str, Any],
|
185
|
+
scorer_info: ScorerInfo,
|
161
186
|
scores: list[Score],
|
162
187
|
metrics: list[Metric],
|
163
188
|
reducer_name: str | None = None,
|
@@ -218,8 +243,10 @@ def scorer_for_metrics(
|
|
218
243
|
scorer=scorer_name,
|
219
244
|
reducer=reducer_name,
|
220
245
|
name=scorer_name,
|
221
|
-
params=
|
222
|
-
metadata=metadata
|
246
|
+
params=scorer_info.params,
|
247
|
+
metadata=scorer_info.metadata
|
248
|
+
if len(scorer_info.metadata.keys()) > 0
|
249
|
+
else None,
|
223
250
|
metrics=list_metrics,
|
224
251
|
)
|
225
252
|
)
|
@@ -228,8 +255,7 @@ def scorer_for_metrics(
|
|
228
255
|
|
229
256
|
def scorers_from_metric_dict(
|
230
257
|
scorer_name: str,
|
231
|
-
|
232
|
-
metadata: dict[str, Any],
|
258
|
+
scorer_info: ScorerInfo,
|
233
259
|
scores: list[Score],
|
234
260
|
metrics: dict[str, list[Metric]],
|
235
261
|
reducer_name: str | None = None,
|
@@ -299,8 +325,10 @@ def scorers_from_metric_dict(
|
|
299
325
|
scorer=scorer_name,
|
300
326
|
reducer=reducer_name,
|
301
327
|
name=metric_key,
|
302
|
-
params=
|
303
|
-
metadata=metadata
|
328
|
+
params=scorer_info.params,
|
329
|
+
metadata=scorer_info.metadata
|
330
|
+
if len(scorer_info.metadata.keys()) > 0
|
331
|
+
else None,
|
304
332
|
metrics=result_metrics,
|
305
333
|
)
|
306
334
|
)
|
inspect_ai/_eval/task/run.py
CHANGED
@@ -27,10 +27,7 @@ from inspect_ai._util.constants import (
|
|
27
27
|
from inspect_ai._util.datetime import iso_now
|
28
28
|
from inspect_ai._util.error import exception_message
|
29
29
|
from inspect_ai._util.hooks import send_telemetry
|
30
|
-
from inspect_ai._util.registry import
|
31
|
-
is_registry_object,
|
32
|
-
registry_log_name,
|
33
|
-
)
|
30
|
+
from inspect_ai._util.registry import is_registry_object, registry_log_name
|
34
31
|
from inspect_ai._util.timeouts import Timeout, timeout, timeout_at
|
35
32
|
from inspect_ai._view.notify import view_notify_eval
|
36
33
|
from inspect_ai.dataset import Dataset, Sample
|
@@ -45,7 +42,11 @@ from inspect_ai.log import (
|
|
45
42
|
from inspect_ai.log._condense import condense_sample
|
46
43
|
from inspect_ai.log._file import eval_log_json_str
|
47
44
|
from inspect_ai.log._log import EvalSampleLimit, EvalSampleReductions, eval_error
|
48
|
-
from inspect_ai.log._samples import
|
45
|
+
from inspect_ai.log._samples import (
|
46
|
+
active_sample,
|
47
|
+
set_active_sample_message_limit,
|
48
|
+
set_active_sample_token_limit,
|
49
|
+
)
|
49
50
|
from inspect_ai.log._transcript import (
|
50
51
|
ErrorEvent,
|
51
52
|
SampleInitEvent,
|
@@ -72,6 +73,7 @@ from inspect_ai.solver._chain import Chain, unroll
|
|
72
73
|
from inspect_ai.solver._fork import set_task_generate
|
73
74
|
from inspect_ai.solver._solver import Solver
|
74
75
|
from inspect_ai.solver._task_state import sample_state, set_sample_state, state_jsonable
|
76
|
+
from inspect_ai.util._limit import SampleLimitExceededError
|
75
77
|
from inspect_ai.util._sandbox.context import sandbox_connections
|
76
78
|
from inspect_ai.util._sandbox.environment import SandboxEnvironmentSpec
|
77
79
|
from inspect_ai.util._subtask import init_subtask
|
@@ -538,6 +540,9 @@ async def task_run_sample(
|
|
538
540
|
# helper to handle exceptions (will throw if we've exceeded the limit)
|
539
541
|
def handle_error(ex: BaseException) -> EvalError:
|
540
542
|
err = sample_error(ex)
|
543
|
+
py_logger.warning(
|
544
|
+
f"Sample error (id: {sample.id}, epoch: {state.epoch}): {exception_message(ex)})"
|
545
|
+
)
|
541
546
|
transcript()._event(ErrorEvent(error=err))
|
542
547
|
return err
|
543
548
|
|
@@ -630,6 +635,20 @@ async def task_run_sample(
|
|
630
635
|
else:
|
631
636
|
raise
|
632
637
|
|
638
|
+
except SampleLimitExceededError as ex:
|
639
|
+
# sample limit event
|
640
|
+
transcript()._event(
|
641
|
+
SampleLimitEvent(
|
642
|
+
type=ex.type,
|
643
|
+
limit=ex.limit,
|
644
|
+
message=f"Sample completed: {ex.message}",
|
645
|
+
)
|
646
|
+
)
|
647
|
+
|
648
|
+
# capture most recent state for scoring
|
649
|
+
state = sample_state() or state
|
650
|
+
state.completed = True
|
651
|
+
|
633
652
|
except BaseException as ex:
|
634
653
|
error = handle_error(ex)
|
635
654
|
|
@@ -648,12 +667,16 @@ async def task_run_sample(
|
|
648
667
|
assert time_limit
|
649
668
|
timeout_cm = timeout(time_limit / 2)
|
650
669
|
|
670
|
+
# turn off sample limits
|
671
|
+
set_active_sample_token_limit(None)
|
672
|
+
set_active_sample_message_limit(None)
|
673
|
+
|
651
674
|
# scoring
|
652
675
|
try:
|
653
676
|
# timeout during scoring will result in an ordinary sample error
|
654
677
|
async with timeout_cm:
|
655
|
-
if
|
656
|
-
for scorer in scorers:
|
678
|
+
if error is None:
|
679
|
+
for scorer in scorers or []:
|
657
680
|
scorer_name = unique_scorer_name(
|
658
681
|
scorer, list(results.keys())
|
659
682
|
)
|
@@ -675,6 +698,16 @@ async def task_run_sample(
|
|
675
698
|
)
|
676
699
|
results[scorer_name] = sample_score
|
677
700
|
|
701
|
+
# add scores returned by solvers
|
702
|
+
if state.scores is not None:
|
703
|
+
for name, score in state.scores.items():
|
704
|
+
results[name] = SampleScore(
|
705
|
+
score=score, sample_id=state.sample_id
|
706
|
+
)
|
707
|
+
|
708
|
+
# propagate results into scores
|
709
|
+
state.scores = {k: v.score for k, v in results.items()}
|
710
|
+
|
678
711
|
except asyncio.CancelledError:
|
679
712
|
if active.interrupt_action:
|
680
713
|
transcript()._event(
|
@@ -819,6 +852,7 @@ async def resolve_dataset(
|
|
819
852
|
epoch=epoch,
|
820
853
|
model=model_name,
|
821
854
|
input=sample.input,
|
855
|
+
target=Target(sample.target),
|
822
856
|
choices=sample.choices,
|
823
857
|
messages=sample_messages(sample),
|
824
858
|
message_limit=message_limit,
|
inspect_ai/_eval/task/sandbox.py
CHANGED
@@ -4,11 +4,13 @@ import contextlib
|
|
4
4
|
from random import random
|
5
5
|
from typing import AsyncGenerator, Callable, NamedTuple, cast
|
6
6
|
|
7
|
+
import httpx
|
8
|
+
|
7
9
|
from inspect_ai._eval.task.task import Task
|
8
10
|
from inspect_ai._eval.task.util import task_run_dir
|
9
11
|
from inspect_ai._util.file import file, filesystem
|
10
12
|
from inspect_ai._util.registry import registry_unqualified_name
|
11
|
-
from inspect_ai._util.url import data_uri_to_base64, is_data_uri
|
13
|
+
from inspect_ai._util.url import data_uri_to_base64, is_data_uri, is_http_url
|
12
14
|
from inspect_ai.dataset import Sample
|
13
15
|
from inspect_ai.util._concurrency import concurrency
|
14
16
|
from inspect_ai.util._sandbox.context import (
|
@@ -65,12 +67,12 @@ async def sandboxenv_context(
|
|
65
67
|
files: dict[str, bytes] = {}
|
66
68
|
if sample.files:
|
67
69
|
for path, contents in sample.files.items():
|
68
|
-
files[path] = read_sandboxenv_file(contents)
|
70
|
+
files[path] = await read_sandboxenv_file(contents)
|
69
71
|
|
70
72
|
# read setup script from sample (add bash shebang if necessary)
|
71
73
|
setup: bytes | None = None
|
72
74
|
if sample.setup:
|
73
|
-
setup = read_sandboxenv_file(sample.setup)
|
75
|
+
setup = await read_sandboxenv_file(sample.setup)
|
74
76
|
setup_str = setup.decode(encoding="utf-8")
|
75
77
|
if not setup_str.strip().startswith("#!"):
|
76
78
|
setup_str = f"#!/usr/bin/env bash\n\n{setup_str}"
|
@@ -108,13 +110,16 @@ async def sandboxenv_context(
|
|
108
110
|
)
|
109
111
|
|
110
112
|
|
111
|
-
def read_sandboxenv_file(contents: str) -> bytes:
|
113
|
+
async def read_sandboxenv_file(contents: str) -> bytes:
|
112
114
|
if is_data_uri(contents):
|
113
115
|
contents_base64 = data_uri_to_base64(contents)
|
114
116
|
file_bytes = base64.b64decode(contents_base64)
|
117
|
+
elif is_http_url(contents):
|
118
|
+
client = httpx.AsyncClient()
|
119
|
+
file_bytes = (await client.get(contents, follow_redirects=True)).content
|
115
120
|
else:
|
116
121
|
# try to read as a file (if it doesn't exist or has a path not cool w/
|
117
|
-
# the
|
122
|
+
# the filesystem then we fall back to contents)
|
118
123
|
try:
|
119
124
|
fs = filesystem(contents)
|
120
125
|
if fs.exists(contents):
|
inspect_ai/_util/constants.py
CHANGED
@@ -0,0 +1,61 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
|
4
|
+
def get_service_by_port(port: int, protocol: Literal["tcp", "udp"]) -> str | None:
|
5
|
+
"""
|
6
|
+
Returns the likely service running on a given port number.
|
7
|
+
|
8
|
+
Args:
|
9
|
+
port (int): The port number to look up
|
10
|
+
protocol (str): Either 'tcp' or 'udp'
|
11
|
+
|
12
|
+
Returns:
|
13
|
+
str: Description of the likely service, or None if not found
|
14
|
+
"""
|
15
|
+
# Common port mappings based on IANA assignments and common usage
|
16
|
+
port_mappings = {
|
17
|
+
"tcp": {
|
18
|
+
20: "FTP (Data)",
|
19
|
+
21: "FTP (Control)",
|
20
|
+
22: "SSH",
|
21
|
+
23: "Telnet",
|
22
|
+
25: "SMTP",
|
23
|
+
53: "DNS",
|
24
|
+
80: "HTTP",
|
25
|
+
110: "POP3",
|
26
|
+
143: "IMAP",
|
27
|
+
443: "HTTPS",
|
28
|
+
445: "Microsoft-DS (SMB)",
|
29
|
+
587: "SMTP (Submission)",
|
30
|
+
993: "IMAPS",
|
31
|
+
995: "POP3S",
|
32
|
+
1433: "Microsoft SQL Server",
|
33
|
+
1521: "Oracle Database",
|
34
|
+
3306: "MySQL",
|
35
|
+
3389: "RDP (Remote Desktop)",
|
36
|
+
5432: "PostgreSQL",
|
37
|
+
5900: "VNC",
|
38
|
+
5901: "VNC Display :1",
|
39
|
+
5902: "VNC Display :2",
|
40
|
+
6080: "noVNC",
|
41
|
+
8080: "HTTP Alternate",
|
42
|
+
8443: "HTTPS Alternate",
|
43
|
+
27017: "MongoDB",
|
44
|
+
27018: "MongoDB Shard",
|
45
|
+
27019: "MongoDB Config Server",
|
46
|
+
},
|
47
|
+
"udp": {
|
48
|
+
53: "DNS",
|
49
|
+
67: "DHCP Server",
|
50
|
+
68: "DHCP Client",
|
51
|
+
69: "TFTP",
|
52
|
+
123: "NTP",
|
53
|
+
161: "SNMP",
|
54
|
+
162: "SNMP Trap",
|
55
|
+
514: "Syslog",
|
56
|
+
1194: "OpenVPN",
|
57
|
+
5353: "mDNS",
|
58
|
+
},
|
59
|
+
}
|
60
|
+
|
61
|
+
return port_mappings.get(protocol, {}).get(port, None)
|
inspect_ai/_util/text.py
CHANGED
@@ -108,3 +108,26 @@ def str_to_float(s: str) -> float:
|
|
108
108
|
exponent = 1 # Default exponent is 1 if no superscript is present
|
109
109
|
|
110
110
|
return base**exponent
|
111
|
+
|
112
|
+
|
113
|
+
def truncate(text: str, length: int, overflow: str = "...", pad: bool = True) -> str:
|
114
|
+
"""
|
115
|
+
Truncate text to specified length with optional padding and overflow indicator.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
text (str): Text to truncate
|
119
|
+
length (int): Maximum length including overflow indicator
|
120
|
+
overflow (str): String to indicate truncation (defaults to '...')
|
121
|
+
pad (bool): Whether to pad the result to full length (defaults to padding)
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
Truncated string, padded if requested
|
125
|
+
|
126
|
+
"""
|
127
|
+
if len(text) <= length:
|
128
|
+
return text + (" " * (length - len(text))) if pad else text
|
129
|
+
|
130
|
+
overflow_length = len(overflow)
|
131
|
+
truncated = text[: length - overflow_length] + overflow
|
132
|
+
|
133
|
+
return truncated
|
inspect_ai/_view/www/App.css
CHANGED
@@ -9,6 +9,12 @@
|
|
9
9
|
--inspect-input-border: var(--bs-light-border-subtle);
|
10
10
|
--inspect-diff-add-color: #dafbe1;
|
11
11
|
--inspect-diff-remove-color: #ffebe9;
|
12
|
+
--inspect-inactive-selection-background: var(--vscode-editor-inactiveSelectionBackground, #d9d9d9);
|
13
|
+
--inspect-active-selection-background: var(--vscode-editor-selectionBackground, #d7d4f0);
|
14
|
+
--inspect-focus-border-color: #86b7fe;
|
15
|
+
--inspect-focus-border-shadow: 0 0 0 0.25rem rgba(var(--bs-primary-rgb), 0.25);
|
16
|
+
--inspect-focus-border-gray-color: #808080;
|
17
|
+
--inspect-focus-border-gray-shadow: 0 0 0 0.25rem rgba(48, 48, 48, 0.25);
|
12
18
|
}
|
13
19
|
|
14
20
|
body:not([class^="vscode-"]) button {
|
@@ -650,6 +656,30 @@ table.table.table-sm td {
|
|
650
656
|
height: auto !important;
|
651
657
|
}
|
652
658
|
|
659
|
+
[data-tooltip] {
|
660
|
+
position: relative;
|
661
|
+
}
|
662
|
+
[data-tooltip]:hover::after {
|
663
|
+
content: attr(data-tooltip);
|
664
|
+
position: absolute;
|
665
|
+
line-height: 1.25;
|
666
|
+
background: var(--bs-light);
|
667
|
+
color: var(--bs-body-color);
|
668
|
+
opacity: 1;
|
669
|
+
padding: 4px 8px;
|
670
|
+
border-radius: 4px;
|
671
|
+
border: 1px solid var(--bs-border-color);
|
672
|
+
box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.25);
|
673
|
+
white-space: pre-wrap;
|
674
|
+
width: max-content;
|
675
|
+
max-width: 400px;
|
676
|
+
z-index: 1000;
|
677
|
+
}
|
678
|
+
[data-tooltip][data-tooltip-position="bottom-left"]:hover::after {
|
679
|
+
right: 0%;
|
680
|
+
top: 100%;
|
681
|
+
}
|
682
|
+
|
653
683
|
/* ANSI Coloring */
|
654
684
|
.ansi-display {
|
655
685
|
font-family: monospace;
|
@@ -725,7 +755,7 @@ pre[class*="language-"].tool-output {
|
|
725
755
|
background: none !important;
|
726
756
|
border: none !important;
|
727
757
|
box-shadow: none !important;
|
728
|
-
border-radius: var(--bs-border-radius) !important;
|
758
|
+
border-radius: var(--bs-border-radius) !important;
|
729
759
|
}
|
730
760
|
|
731
761
|
/* lightbox styles */
|
@@ -14282,6 +14282,12 @@ pre[class*="language-"] {
|
|
14282
14282
|
--inspect-input-border: var(--bs-light-border-subtle);
|
14283
14283
|
--inspect-diff-add-color: #dafbe1;
|
14284
14284
|
--inspect-diff-remove-color: #ffebe9;
|
14285
|
+
--inspect-inactive-selection-background: var(--vscode-editor-inactiveSelectionBackground, #d9d9d9);
|
14286
|
+
--inspect-active-selection-background: var(--vscode-editor-selectionBackground, #d7d4f0);
|
14287
|
+
--inspect-focus-border-color: #86b7fe;
|
14288
|
+
--inspect-focus-border-shadow: 0 0 0 0.25rem rgba(var(--bs-primary-rgb), 0.25);
|
14289
|
+
--inspect-focus-border-gray-color: #808080;
|
14290
|
+
--inspect-focus-border-gray-shadow: 0 0 0 0.25rem rgba(48, 48, 48, 0.25);
|
14285
14291
|
}
|
14286
14292
|
|
14287
14293
|
body:not([class^="vscode-"]) button {
|
@@ -14923,6 +14929,30 @@ table.table.table-sm td {
|
|
14923
14929
|
height: auto !important;
|
14924
14930
|
}
|
14925
14931
|
|
14932
|
+
[data-tooltip] {
|
14933
|
+
position: relative;
|
14934
|
+
}
|
14935
|
+
[data-tooltip]:hover::after {
|
14936
|
+
content: attr(data-tooltip);
|
14937
|
+
position: absolute;
|
14938
|
+
line-height: 1.25;
|
14939
|
+
background: var(--bs-light);
|
14940
|
+
color: var(--bs-body-color);
|
14941
|
+
opacity: 1;
|
14942
|
+
padding: 4px 8px;
|
14943
|
+
border-radius: 4px;
|
14944
|
+
border: 1px solid var(--bs-border-color);
|
14945
|
+
box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.25);
|
14946
|
+
white-space: pre-wrap;
|
14947
|
+
width: max-content;
|
14948
|
+
max-width: 400px;
|
14949
|
+
z-index: 1000;
|
14950
|
+
}
|
14951
|
+
[data-tooltip][data-tooltip-position="bottom-left"]:hover::after {
|
14952
|
+
right: 0%;
|
14953
|
+
top: 100%;
|
14954
|
+
}
|
14955
|
+
|
14926
14956
|
/* ANSI Coloring */
|
14927
14957
|
.ansi-display {
|
14928
14958
|
font-family: monospace;
|
@@ -14998,7 +15028,7 @@ pre[class*="language-"].tool-output {
|
|
14998
15028
|
background: none !important;
|
14999
15029
|
border: none !important;
|
15000
15030
|
box-shadow: none !important;
|
15001
|
-
border-radius: var(--bs-border-radius) !important;
|
15031
|
+
border-radius: var(--bs-border-radius) !important;
|
15002
15032
|
}
|
15003
15033
|
|
15004
15034
|
/* lightbox styles */
|