inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +3 -1
- inspect_ai/_cli/eval.py +15 -9
- inspect_ai/_display/core/active.py +4 -1
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +0 -5
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +79 -12
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +10 -1
- inspect_ai/_eval/loader.py +79 -19
- inspect_ai/_eval/registry.py +6 -0
- inspect_ai/_eval/score.py +3 -1
- inspect_ai/_eval/task/results.py +51 -22
- inspect_ai/_eval/task/run.py +47 -13
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25498 -2044
- inspect_ai/_view/www/log-schema.json +32 -2
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +14 -16
- inspect_ai/_view/www/src/Types.mjs +1 -2
- inspect_ai/_view/www/src/api/Types.ts +133 -0
- inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
- inspect_ai/_view/www/src/api/api-http.ts +219 -0
- inspect_ai/_view/www/src/api/api-shared.ts +47 -0
- inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
- inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
- inspect_ai/_view/www/src/api/index.ts +51 -0
- inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +77 -4
- inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
- inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
- inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +13 -2
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
- inspect_ai/_view/www/src/utils/vscode.ts +36 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/manager.py +1 -1
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/log/_log.py +1 -1
- inspect_ai/log/_samples.py +16 -0
- inspect_ai/log/_transcript.py +4 -1
- inspect_ai/model/_call_tools.py +59 -0
- inspect_ai/model/_conversation.py +16 -7
- inspect_ai/model/_generate_config.py +12 -12
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +22 -2
- inspect_ai/model/_openai.py +383 -0
- inspect_ai/model/_providers/anthropic.py +152 -55
- inspect_ai/model/_providers/azureai.py +21 -21
- inspect_ai/model/_providers/bedrock.py +37 -40
- inspect_ai/model/_providers/goodfire.py +248 -0
- inspect_ai/model/_providers/google.py +46 -54
- inspect_ai/model/_providers/groq.py +7 -3
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +13 -12
- inspect_ai/model/_providers/openai.py +51 -218
- inspect_ai/model/_providers/openai_o1.py +11 -12
- inspect_ai/model/_providers/providers.py +23 -1
- inspect_ai/model/_providers/together.py +12 -12
- inspect_ai/model/_providers/util/__init__.py +2 -3
- inspect_ai/model/_providers/util/hf_handler.py +1 -1
- inspect_ai/model/_providers/util/llama31.py +1 -1
- inspect_ai/model/_providers/util/util.py +0 -76
- inspect_ai/model/_providers/vertex.py +1 -4
- inspect_ai/scorer/_metric.py +3 -0
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +4 -3
- inspect_ai/solver/__init__.py +4 -5
- inspect_ai/solver/_basic_agent.py +1 -1
- inspect_ai/solver/_bridge/__init__.py +3 -0
- inspect_ai/solver/_bridge/bridge.py +100 -0
- inspect_ai/solver/_bridge/patch.py +170 -0
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_solver.py +6 -0
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +12 -1
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_display.py +5 -0
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/docker/docker.py +64 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
- inspect_ai/util/_sandbox/environment.py +14 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
- inspect_ai/_view/www/src/api/Types.mjs +0 -117
- inspect_ai/_view/www/src/api/api-http.mjs +0 -300
- inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
- inspect_ai/_view/www/src/api/index.mjs +0 -49
- inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/common.py
CHANGED
@@ -2,6 +2,7 @@ import functools
|
|
2
2
|
from typing import Any, Callable, Literal, cast
|
3
3
|
|
4
4
|
import click
|
5
|
+
import rich
|
5
6
|
from typing_extensions import TypedDict
|
6
7
|
|
7
8
|
from inspect_ai._util.constants import (
|
@@ -105,7 +106,8 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
105
106
|
def process_common_options(options: CommonOptions) -> None:
|
106
107
|
# propagate display
|
107
108
|
if options["no_ansi"]:
|
108
|
-
display = "
|
109
|
+
display = "rich"
|
110
|
+
rich.reconfigure(no_color=True)
|
109
111
|
else:
|
110
112
|
display = options["display"].lower().strip()
|
111
113
|
init_display_type(display)
|
inspect_ai/_cli/eval.py
CHANGED
@@ -314,12 +314,6 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
314
314
|
help="Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
|
315
315
|
envvar="INSPECT_EVAL_STOP_SEQS",
|
316
316
|
)
|
317
|
-
@click.option(
|
318
|
-
"--suffix",
|
319
|
-
type=str,
|
320
|
-
help="The suffix that comes after a completion of inserted text. OpenAI only.",
|
321
|
-
envvar="INSPECT_EVAL_SUFFIX",
|
322
|
-
)
|
323
317
|
@click.option(
|
324
318
|
"--temperature",
|
325
319
|
type=float,
|
@@ -348,13 +342,13 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
348
342
|
"--logprobs",
|
349
343
|
type=bool,
|
350
344
|
is_flag=True,
|
351
|
-
help="Return log probabilities of the output tokens. OpenAI,
|
345
|
+
help="Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
|
352
346
|
envvar="INSPECT_EVAL_LOGPROBS",
|
353
347
|
)
|
354
348
|
@click.option(
|
355
349
|
"--top-logprobs",
|
356
350
|
type=int,
|
357
|
-
help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI,
|
351
|
+
help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, TogetherAI, Huggingface, and vLLM only.",
|
358
352
|
envvar="INSPECT_EVAL_TOP_LOGPROBS",
|
359
353
|
)
|
360
354
|
@click.option(
|
@@ -365,6 +359,14 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
365
359
|
help="Whether to enable parallel function calling during tool use (defaults to True) OpenAI and Groq only.",
|
366
360
|
envvar="INSPECT_EVAL_PARALLEL_TOOL_CALLS",
|
367
361
|
)
|
362
|
+
@click.option(
|
363
|
+
"--internal-tools/--no-internal-tools",
|
364
|
+
type=bool,
|
365
|
+
is_flag=True,
|
366
|
+
default=True,
|
367
|
+
help="Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic).",
|
368
|
+
envvar="INSPECT_EVAL_INTERNAL_TOOLS",
|
369
|
+
)
|
368
370
|
@click.option(
|
369
371
|
"--max-tool-output",
|
370
372
|
type=int,
|
@@ -431,7 +433,6 @@ def eval_command(
|
|
431
433
|
logit_bias: str | None,
|
432
434
|
seed: int | None,
|
433
435
|
stop_seqs: str | None,
|
434
|
-
suffix: str | None,
|
435
436
|
temperature: float | None,
|
436
437
|
top_p: float | None,
|
437
438
|
top_k: int | None,
|
@@ -439,6 +440,7 @@ def eval_command(
|
|
439
440
|
logprobs: bool | None,
|
440
441
|
top_logprobs: int | None,
|
441
442
|
parallel_tool_calls: bool | None,
|
443
|
+
internal_tools: bool | None,
|
442
444
|
max_tool_output: int | None,
|
443
445
|
cache_prompt: str | None,
|
444
446
|
reasoning_effort: str | None,
|
@@ -598,6 +600,7 @@ def eval_set_command(
|
|
598
600
|
logprobs: bool | None,
|
599
601
|
top_logprobs: int | None,
|
600
602
|
parallel_tool_calls: bool | None,
|
603
|
+
internal_tools: bool | None,
|
601
604
|
max_tool_output: int | None,
|
602
605
|
cache_prompt: str | None,
|
603
606
|
reasoning_effort: str | None,
|
@@ -836,6 +839,9 @@ def config_from_locals(locals: dict[str, Any]) -> GenerateConfigArgs:
|
|
836
839
|
if key == "parallel_tool_calls":
|
837
840
|
if value is not False:
|
838
841
|
value = None
|
842
|
+
if key == "internal_tools":
|
843
|
+
if value is not False:
|
844
|
+
value = None
|
839
845
|
config[key] = value # type: ignore
|
840
846
|
return config
|
841
847
|
|
@@ -5,6 +5,7 @@ import rich
|
|
5
5
|
|
6
6
|
from inspect_ai.util._display import display_type
|
7
7
|
|
8
|
+
from ..plain.display import PlainDisplay
|
8
9
|
from ..rich.display import RichDisplay
|
9
10
|
from ..textual.display import TextualDisplay
|
10
11
|
from .display import Display, TaskScreen
|
@@ -13,7 +14,9 @@ from .display import Display, TaskScreen
|
|
13
14
|
def display() -> Display:
|
14
15
|
global _active_display
|
15
16
|
if _active_display is None:
|
16
|
-
if (
|
17
|
+
if display_type() == "plain":
|
18
|
+
_active_display = PlainDisplay()
|
19
|
+
elif (
|
17
20
|
display_type() == "full"
|
18
21
|
and sys.stdout.isatty()
|
19
22
|
and not rich.get_console().is_jupyter
|
@@ -13,14 +13,14 @@ def task_config(
|
|
13
13
|
value = task_args[key]
|
14
14
|
if is_registry_dict(value):
|
15
15
|
task_args[key] = value["name"]
|
16
|
-
config =
|
16
|
+
config = dict(profile.eval_config.model_dump(exclude_none=True)) | task_args
|
17
17
|
if generate_config:
|
18
|
-
config =
|
18
|
+
config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
|
19
19
|
if profile.tags:
|
20
20
|
config["tags"] = ",".join(profile.tags)
|
21
21
|
config_print: list[str] = []
|
22
22
|
for name, value in config.items():
|
23
|
-
if name == "approval":
|
23
|
+
if name == "approval" and isinstance(value, dict):
|
24
24
|
config_print.append(
|
25
25
|
f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
|
26
26
|
)
|
@@ -50,9 +50,13 @@ def task_panel(
|
|
50
50
|
table.add_row(subtitle_table)
|
51
51
|
|
52
52
|
# main progress and task info
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
if body:
|
54
|
+
table.add_row()
|
55
|
+
table.add_row(body)
|
56
|
+
|
57
|
+
# spacing if there is more ocontent
|
58
|
+
if footer or log_location:
|
59
|
+
table.add_row()
|
56
60
|
|
57
61
|
# footer if specified
|
58
62
|
if footer:
|
File without changes
|
@@ -0,0 +1,203 @@
|
|
1
|
+
import asyncio
|
2
|
+
import contextlib
|
3
|
+
from typing import Any, AsyncIterator, Coroutine, Iterator
|
4
|
+
|
5
|
+
import rich
|
6
|
+
|
7
|
+
from inspect_ai._display.core.rich import rich_initialise
|
8
|
+
from inspect_ai._util.text import truncate
|
9
|
+
from inspect_ai._util.throttle import throttle
|
10
|
+
|
11
|
+
from ...util._concurrency import concurrency_status
|
12
|
+
from ..core.config import task_config
|
13
|
+
from ..core.display import (
|
14
|
+
TR,
|
15
|
+
Display,
|
16
|
+
Progress,
|
17
|
+
TaskDisplay,
|
18
|
+
TaskDisplayMetric,
|
19
|
+
TaskProfile,
|
20
|
+
TaskResult,
|
21
|
+
TaskScreen,
|
22
|
+
TaskSpec,
|
23
|
+
TaskWithResult,
|
24
|
+
)
|
25
|
+
from ..core.footer import task_http_rate_limits
|
26
|
+
from ..core.panel import task_panel, task_targets
|
27
|
+
from ..core.results import task_metric, tasks_results
|
28
|
+
|
29
|
+
|
30
|
+
class PlainDisplay(Display):
|
31
|
+
def __init__(self) -> None:
|
32
|
+
self.total_tasks: int = 0
|
33
|
+
self.tasks: list[TaskWithResult] = []
|
34
|
+
self.parallel = False
|
35
|
+
rich_initialise()
|
36
|
+
|
37
|
+
def print(self, message: str) -> None:
|
38
|
+
print(message)
|
39
|
+
|
40
|
+
@contextlib.contextmanager
|
41
|
+
def progress(self, total: int) -> Iterator[Progress]:
|
42
|
+
yield PlainProgress(total)
|
43
|
+
|
44
|
+
def run_task_app(self, main: Coroutine[Any, Any, TR]) -> TR:
|
45
|
+
return asyncio.run(main)
|
46
|
+
|
47
|
+
@contextlib.contextmanager
|
48
|
+
def suspend_task_app(self) -> Iterator[None]:
|
49
|
+
yield
|
50
|
+
|
51
|
+
@contextlib.asynccontextmanager
|
52
|
+
async def task_screen(
|
53
|
+
self, tasks: list[TaskSpec], parallel: bool
|
54
|
+
) -> AsyncIterator[TaskScreen]:
|
55
|
+
self.total_tasks = len(tasks)
|
56
|
+
self.multiple_task_names = len({task.name for task in tasks}) > 1
|
57
|
+
self.multiple_model_names = len({str(task.model) for task in tasks}) > 1
|
58
|
+
self.tasks = []
|
59
|
+
self.parallel = parallel
|
60
|
+
try:
|
61
|
+
# Print header for task(s)
|
62
|
+
if parallel:
|
63
|
+
print(f"Running {self.total_tasks} tasks...")
|
64
|
+
yield TaskScreen()
|
65
|
+
finally:
|
66
|
+
# Print final results
|
67
|
+
if self.tasks:
|
68
|
+
self._print_results()
|
69
|
+
|
70
|
+
@contextlib.contextmanager
|
71
|
+
def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
|
72
|
+
# Print initial task information using a rich panel
|
73
|
+
panel = task_panel(
|
74
|
+
profile=profile,
|
75
|
+
show_model=True,
|
76
|
+
body="", # Empty body since we haven't started yet
|
77
|
+
subtitle=(task_config(profile), task_targets(profile)),
|
78
|
+
footer=None,
|
79
|
+
log_location=None,
|
80
|
+
)
|
81
|
+
rich.print(panel)
|
82
|
+
|
83
|
+
# Create and yield task display
|
84
|
+
task = TaskWithResult(profile, None)
|
85
|
+
self.tasks.append(task)
|
86
|
+
yield PlainTaskDisplay(
|
87
|
+
task,
|
88
|
+
show_task_names=self.multiple_task_names,
|
89
|
+
show_model_names=self.multiple_model_names,
|
90
|
+
)
|
91
|
+
|
92
|
+
def _print_results(self) -> None:
|
93
|
+
"""Print final results using rich panels"""
|
94
|
+
panels = tasks_results(self.tasks)
|
95
|
+
rich.print(panels)
|
96
|
+
|
97
|
+
|
98
|
+
class PlainProgress(Progress):
|
99
|
+
def __init__(self, total: int):
|
100
|
+
self.total = total
|
101
|
+
self.current = 0
|
102
|
+
|
103
|
+
def update(self, n: int = 1) -> None:
|
104
|
+
self.current += n
|
105
|
+
# No direct printing - PlainTaskDisplay handles it
|
106
|
+
|
107
|
+
def complete(self) -> None:
|
108
|
+
self.current = self.total
|
109
|
+
|
110
|
+
|
111
|
+
class PlainTaskDisplay(TaskDisplay):
|
112
|
+
def __init__(
|
113
|
+
self, task: TaskWithResult, *, show_task_names: bool, show_model_names: bool
|
114
|
+
):
|
115
|
+
self.task = task
|
116
|
+
self.show_task_names = show_task_names
|
117
|
+
self.show_model_names = show_model_names
|
118
|
+
self.progress_display: PlainProgress | None = None
|
119
|
+
self.samples_complete = 0
|
120
|
+
self.samples_total = 0
|
121
|
+
self.current_metrics: list[TaskDisplayMetric] | None = None
|
122
|
+
self.last_progress = 0 # Track last progress percentage
|
123
|
+
|
124
|
+
@contextlib.contextmanager
|
125
|
+
def progress(self) -> Iterator[Progress]:
|
126
|
+
self.progress_display = PlainProgress(self.task.profile.steps)
|
127
|
+
yield self.progress_display
|
128
|
+
|
129
|
+
@throttle(1)
|
130
|
+
def _print_status_throttled(self) -> None:
|
131
|
+
self._print_status()
|
132
|
+
|
133
|
+
def _print_status(self) -> None:
|
134
|
+
"""Print status updates on new lines when there's meaningful progress"""
|
135
|
+
if not self.progress_display:
|
136
|
+
return
|
137
|
+
|
138
|
+
# Calculate current progress percentage
|
139
|
+
current_progress = int(
|
140
|
+
self.progress_display.current / self.progress_display.total * 100
|
141
|
+
)
|
142
|
+
|
143
|
+
# Only print on percentage changes to avoid too much output
|
144
|
+
if current_progress != self.last_progress:
|
145
|
+
status_parts: list[str] = []
|
146
|
+
|
147
|
+
# if this is parallel print task and model to distinguish (limit both to 12 chars)
|
148
|
+
MAX_NAME_WIDTH = 12
|
149
|
+
if self.show_task_names:
|
150
|
+
status_parts.append(truncate(self.task.profile.name, MAX_NAME_WIDTH))
|
151
|
+
if self.show_model_names:
|
152
|
+
status_parts.append(
|
153
|
+
truncate(str(self.task.profile.model), MAX_NAME_WIDTH)
|
154
|
+
)
|
155
|
+
|
156
|
+
# Add step progress
|
157
|
+
status_parts.append(
|
158
|
+
f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {current_progress:3d}%"
|
159
|
+
)
|
160
|
+
|
161
|
+
# Add sample progress
|
162
|
+
status_parts.append(
|
163
|
+
f"Samples: {self.samples_complete:3d}/{self.samples_total:3d}"
|
164
|
+
)
|
165
|
+
|
166
|
+
# Add metrics
|
167
|
+
if self.current_metrics:
|
168
|
+
metric_str = task_metric(self.current_metrics)
|
169
|
+
status_parts.append(metric_str)
|
170
|
+
|
171
|
+
# Add resource usage
|
172
|
+
# Very similar to ``inspect_ai._display.core.footer.task_resources``, but without
|
173
|
+
# the rich formatting added in the ``task_dict`` call
|
174
|
+
resources_dict: dict[str, str] = {}
|
175
|
+
for model, resource in concurrency_status().items():
|
176
|
+
resources_dict[model] = f"{resource[0]:2d}/{resource[1]:2d}"
|
177
|
+
resources = ", ".join(
|
178
|
+
[f"{key}: {value}" for key, value in resources_dict.items()]
|
179
|
+
)
|
180
|
+
status_parts.append(resources)
|
181
|
+
|
182
|
+
# Add rate limits
|
183
|
+
rate_limits = task_http_rate_limits()
|
184
|
+
if rate_limits:
|
185
|
+
status_parts.append(rate_limits)
|
186
|
+
|
187
|
+
# Print on new line
|
188
|
+
print(" | ".join(status_parts))
|
189
|
+
|
190
|
+
self.last_progress = current_progress
|
191
|
+
|
192
|
+
def sample_complete(self, complete: int, total: int) -> None:
|
193
|
+
self.samples_complete = complete
|
194
|
+
self.samples_total = total
|
195
|
+
self._print_status_throttled()
|
196
|
+
|
197
|
+
def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
|
198
|
+
self.current_metrics = metrics
|
199
|
+
self._print_status_throttled()
|
200
|
+
|
201
|
+
def complete(self, result: TaskResult) -> None:
|
202
|
+
self.task.result = result
|
203
|
+
self._print_status()
|
@@ -129,11 +129,6 @@ class RichDisplay(Display):
|
|
129
129
|
@override
|
130
130
|
@contextlib.contextmanager
|
131
131
|
def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
|
132
|
-
# if there is no ansi display than all of the below will
|
133
|
-
# be a no-op, so we print a simple text message for the task
|
134
|
-
if display_type() == "plain":
|
135
|
-
rich.get_console().print(task_no_ansi(profile))
|
136
|
-
|
137
132
|
# for typechekcer
|
138
133
|
if self.tasks is None:
|
139
134
|
self.tasks = []
|
@@ -0,0 +1,110 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from textual.app import ComposeResult
|
4
|
+
from textual.containers import HorizontalScroll
|
5
|
+
from textual.widget import Widget
|
6
|
+
from textual.widgets import Link, Static
|
7
|
+
|
8
|
+
from inspect_ai._util.port_names import get_service_by_port
|
9
|
+
from inspect_ai.util._sandbox.environment import PortMapping
|
10
|
+
|
11
|
+
|
12
|
+
class PortMappingsView(HorizontalScroll):
|
13
|
+
DEFAULT_CSS = """
|
14
|
+
PortMappingsView {
|
15
|
+
layout: grid;
|
16
|
+
height: auto;
|
17
|
+
grid-size: 4 3;
|
18
|
+
grid-columns: auto auto auto auto;
|
19
|
+
grid-gutter: 0 1;
|
20
|
+
}
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, ports: list[PortMapping] | None) -> None:
|
24
|
+
super().__init__()
|
25
|
+
self.ports = ports
|
26
|
+
|
27
|
+
def compose(self) -> ComposeResult:
|
28
|
+
if not self.ports:
|
29
|
+
return
|
30
|
+
yield Static("service")
|
31
|
+
yield Static("sandbox")
|
32
|
+
yield Static("client")
|
33
|
+
yield Static("endpoint")
|
34
|
+
mappings_and_services = [
|
35
|
+
(mapping, get_service_by_port(mapping.container_port, mapping.protocol))
|
36
|
+
for mapping in self.ports
|
37
|
+
]
|
38
|
+
remaining_widgets = [
|
39
|
+
widget
|
40
|
+
for mapping_and_service in mappings_and_services
|
41
|
+
for widget in widgets_from_port_mapping(mapping_and_service)
|
42
|
+
]
|
43
|
+
for widget in remaining_widgets:
|
44
|
+
yield widget
|
45
|
+
|
46
|
+
|
47
|
+
def widgets_for_port_mappings(
|
48
|
+
port_mappings: list[PortMapping] | None,
|
49
|
+
) -> list[Widget]:
|
50
|
+
if port_mappings is None:
|
51
|
+
return []
|
52
|
+
return [
|
53
|
+
static
|
54
|
+
for mapping in [
|
55
|
+
(mapping, get_service_by_port(mapping.container_port, mapping.protocol))
|
56
|
+
for mapping in port_mappings
|
57
|
+
]
|
58
|
+
for static in widgets_from_port_mapping(mapping)
|
59
|
+
]
|
60
|
+
|
61
|
+
|
62
|
+
def widgets_from_port_mapping(
|
63
|
+
mapping_service_tuple: tuple[PortMapping, str | None],
|
64
|
+
) -> list[Widget]:
|
65
|
+
port_mapping, service = mapping_service_tuple
|
66
|
+
return [
|
67
|
+
widget
|
68
|
+
for host_mapping in port_mapping.mappings
|
69
|
+
for widget in get_row_widgets(
|
70
|
+
port_mapping.protocol,
|
71
|
+
host_mapping.host_port,
|
72
|
+
port_mapping.container_port,
|
73
|
+
service,
|
74
|
+
)
|
75
|
+
]
|
76
|
+
|
77
|
+
|
78
|
+
def get_row_widgets(
|
79
|
+
protocol: Literal["tcp", "udp"],
|
80
|
+
host_port: int,
|
81
|
+
container_port: int,
|
82
|
+
service: str | None,
|
83
|
+
) -> list[Widget]:
|
84
|
+
url = get_url(
|
85
|
+
host_port,
|
86
|
+
service,
|
87
|
+
)
|
88
|
+
return [
|
89
|
+
Static(service if service is not None else protocol),
|
90
|
+
Static(str(container_port)),
|
91
|
+
Static(str(host_port)),
|
92
|
+
Link(url) if url is not None else Static("asdf"),
|
93
|
+
]
|
94
|
+
|
95
|
+
|
96
|
+
def get_url(
|
97
|
+
host_port: int,
|
98
|
+
service: str | None,
|
99
|
+
) -> str | None:
|
100
|
+
if service is not None:
|
101
|
+
if service == "noVNC":
|
102
|
+
return f"http://localhost:{host_port}?view_only=true&autoconnect=true&resize=scale"
|
103
|
+
|
104
|
+
if service.startswith("HTTP"):
|
105
|
+
return f"https://localhost:{host_port}"
|
106
|
+
|
107
|
+
if service.startswith("VNC"):
|
108
|
+
return f"vnc://localhost:{host_port}"
|
109
|
+
|
110
|
+
return None
|
@@ -5,29 +5,28 @@ from rich.console import RenderableType
|
|
5
5
|
from rich.table import Table
|
6
6
|
from rich.text import Text
|
7
7
|
from textual.app import ComposeResult
|
8
|
-
from textual.containers import
|
9
|
-
Horizontal,
|
10
|
-
HorizontalGroup,
|
11
|
-
Vertical,
|
12
|
-
VerticalGroup,
|
13
|
-
)
|
8
|
+
from textual.containers import Horizontal, HorizontalGroup, Vertical, VerticalGroup
|
14
9
|
from textual.reactive import reactive
|
15
10
|
from textual.widget import Widget
|
16
11
|
from textual.widgets import (
|
17
12
|
Button,
|
18
13
|
Collapsible,
|
14
|
+
Link,
|
19
15
|
LoadingIndicator,
|
20
16
|
OptionList,
|
21
17
|
Static,
|
22
18
|
)
|
23
19
|
from textual.widgets.option_list import Option, Separator
|
24
20
|
|
21
|
+
from inspect_ai._display.textual.widgets.port_mappings import get_url
|
25
22
|
from inspect_ai._util.format import format_progress_time
|
23
|
+
from inspect_ai._util.port_names import get_service_by_port
|
26
24
|
from inspect_ai._util.registry import registry_unqualified_name
|
27
25
|
from inspect_ai.log._samples import ActiveSample
|
28
26
|
from inspect_ai.log._transcript import ToolEvent
|
29
27
|
|
30
28
|
from .clock import Clock
|
29
|
+
from .sandbox import SandboxView
|
31
30
|
from .transcript import TranscriptView
|
32
31
|
|
33
32
|
|
@@ -74,6 +73,7 @@ class SamplesView(Widget):
|
|
74
73
|
|
75
74
|
async def set_highlighted_sample(self, highlighted: int | None) -> None:
|
76
75
|
sample_info = self.query_one(SampleInfo)
|
76
|
+
sample_vnc = self.query_one(SampleVNC)
|
77
77
|
transcript_view = self.query_one(TranscriptView)
|
78
78
|
sample_toolbar = self.query_one(SampleToolbar)
|
79
79
|
if highlighted is not None:
|
@@ -83,12 +83,14 @@ class SamplesView(Widget):
|
|
83
83
|
transcript_view.display = True
|
84
84
|
sample_toolbar.display = True
|
85
85
|
await sample_info.sync_sample(sample)
|
86
|
+
await sample_vnc.sync_sample(sample)
|
86
87
|
await transcript_view.sync_sample(sample)
|
87
88
|
await sample_toolbar.sync_sample(sample)
|
88
89
|
return
|
89
90
|
|
90
91
|
# otherwise hide ui
|
91
92
|
sample_info.display = False
|
93
|
+
sample_vnc.display = False
|
92
94
|
transcript_view.display = False
|
93
95
|
sample_toolbar.display = False
|
94
96
|
|
@@ -182,10 +184,59 @@ class SamplesList(OptionList):
|
|
182
184
|
return None
|
183
185
|
|
184
186
|
|
185
|
-
class
|
187
|
+
class SampleVNC(Horizontal):
|
188
|
+
DEFAULT_CSS = """
|
189
|
+
SampleVNC {
|
190
|
+
layout: grid;
|
191
|
+
grid-size: 2 1;
|
192
|
+
grid-columns: auto 1fr;
|
193
|
+
}
|
194
|
+
SampleVNC Static {
|
195
|
+
color: $secondary;
|
196
|
+
}
|
197
|
+
SampleVNC Link {
|
198
|
+
color: $accent;
|
199
|
+
}
|
200
|
+
"""
|
201
|
+
|
202
|
+
def __init__(self) -> None:
|
203
|
+
super().__init__()
|
204
|
+
self._sample: ActiveSample | None = None
|
205
|
+
|
206
|
+
def compose(self) -> ComposeResult:
|
207
|
+
yield Static("VNC: ")
|
208
|
+
yield Link("")
|
209
|
+
|
210
|
+
async def sync_sample(self, sample: ActiveSample) -> None:
|
211
|
+
if sample == self._sample:
|
212
|
+
return
|
213
|
+
|
214
|
+
# defult to hidden (show if we find a vnc connection)
|
215
|
+
self.display = False
|
216
|
+
|
217
|
+
# is there a vnc connection? if so populate
|
218
|
+
for connection in [c for c in sample.sandboxes.values() if c.ports]:
|
219
|
+
for port in connection.ports or []:
|
220
|
+
service = get_service_by_port(port.container_port, port.protocol)
|
221
|
+
if service == "noVNC" and port.mappings:
|
222
|
+
host_mappings = port.mappings
|
223
|
+
link = self.query_one(Link)
|
224
|
+
vnc_url = get_url(host_mappings[0].host_port, service)
|
225
|
+
if vnc_url:
|
226
|
+
link.text = vnc_url
|
227
|
+
link.url = link.text
|
228
|
+
self.display = True
|
229
|
+
break
|
230
|
+
|
231
|
+
|
232
|
+
class SampleInfo(Vertical):
|
186
233
|
DEFAULT_CSS = """
|
187
234
|
SampleInfo {
|
188
235
|
color: $text-muted;
|
236
|
+
layout: grid;
|
237
|
+
grid-size: 1 2;
|
238
|
+
grid-rows: auto 1;
|
239
|
+
grid-gutter: 1;
|
189
240
|
}
|
190
241
|
SampleInfo Collapsible {
|
191
242
|
padding: 0;
|
@@ -218,11 +269,13 @@ class SampleInfo(Horizontal):
|
|
218
269
|
def __init__(self) -> None:
|
219
270
|
super().__init__()
|
220
271
|
self._sample: ActiveSample | None = None
|
272
|
+
self._sandbox_count: int | None = None
|
221
273
|
|
222
274
|
def compose(self) -> ComposeResult:
|
223
275
|
with Collapsible(title=""):
|
224
276
|
yield SampleLimits()
|
225
277
|
yield SandboxesView()
|
278
|
+
yield SampleVNC()
|
226
279
|
|
227
280
|
async def sync_sample(self, sample: ActiveSample | None) -> None:
|
228
281
|
if sample is None:
|
@@ -233,12 +286,14 @@ class SampleInfo(Horizontal):
|
|
233
286
|
limits = self.query_one(SampleLimits)
|
234
287
|
await limits.sync_sample(sample)
|
235
288
|
|
289
|
+
new_sandbox_count = len(sample.sandboxes)
|
236
290
|
# bail if we've already processed this sample
|
237
|
-
if self._sample == sample:
|
291
|
+
if self._sample == sample and self._sandbox_count == new_sandbox_count:
|
238
292
|
return
|
239
293
|
|
240
294
|
# set sample
|
241
295
|
self._sample = sample
|
296
|
+
self._sandbox_count = new_sandbox_count
|
242
297
|
|
243
298
|
# update UI
|
244
299
|
self.display = True
|
@@ -246,6 +301,7 @@ class SampleInfo(Horizontal):
|
|
246
301
|
self.query_one(Collapsible).title = title
|
247
302
|
sandboxes = self.query_one(SandboxesView)
|
248
303
|
await sandboxes.sync_sample(sample)
|
304
|
+
await self.query_one(SampleVNC).sync_sample(sample)
|
249
305
|
|
250
306
|
|
251
307
|
class SampleLimits(Widget):
|
@@ -295,6 +351,9 @@ class SandboxesView(Vertical):
|
|
295
351
|
background: transparent;
|
296
352
|
height: auto;
|
297
353
|
}
|
354
|
+
#sandboxes-list {
|
355
|
+
height: auto;
|
356
|
+
}
|
298
357
|
SandboxesView Static {
|
299
358
|
background: transparent;
|
300
359
|
}
|
@@ -312,16 +371,24 @@ class SandboxesView(Vertical):
|
|
312
371
|
|
313
372
|
async def sync_sample(self, sample: ActiveSample) -> None:
|
314
373
|
if len(sample.sandboxes) > 0:
|
374
|
+
multiple_sandboxes = len(sample.sandboxes) > 1
|
315
375
|
self.display = True
|
316
376
|
sandboxes_caption = cast(Static, self.query_one("#sandboxes-caption"))
|
317
|
-
sandboxes_caption.update(
|
377
|
+
sandboxes_caption.update(
|
378
|
+
f"[bold]sandbox container{'s' if multiple_sandboxes else ''}:[/bold]"
|
379
|
+
)
|
318
380
|
|
319
381
|
sandboxes_list = self.query_one("#sandboxes-list")
|
320
382
|
await sandboxes_list.remove_children()
|
383
|
+
|
321
384
|
await sandboxes_list.mount_all(
|
322
|
-
[
|
385
|
+
[
|
386
|
+
SandboxView(connection, name if multiple_sandboxes else None)
|
387
|
+
for name, connection in sample.sandboxes.items()
|
388
|
+
]
|
323
389
|
)
|
324
|
-
|
390
|
+
|
391
|
+
await sandboxes_list.mount(
|
325
392
|
Static(
|
326
393
|
"[italic]Hold down Alt (or Option) to select text for copying[/italic]",
|
327
394
|
classes="clipboard-message",
|
@@ -346,7 +413,7 @@ class SampleToolbar(Horizontal):
|
|
346
413
|
grid-columns: auto auto 1fr auto auto;
|
347
414
|
}}
|
348
415
|
SampleToolbar #{STATUS_GROUP} {{
|
349
|
-
|
416
|
+
width: 22;
|
350
417
|
}}
|
351
418
|
SampleToolbar Button {{
|
352
419
|
margin-bottom: 1;
|