inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/common.py +7 -3
- inspect_ai/_cli/eval.py +17 -2
- inspect_ai/_cli/trace.py +21 -2
- inspect_ai/_display/core/active.py +4 -3
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +4 -9
- inspect_ai/_display/textual/app.py +4 -1
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +119 -16
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +32 -20
- inspect_ai/_eval/evalset.py +7 -5
- inspect_ai/_eval/score.py +1 -0
- inspect_ai/_eval/task/__init__.py +2 -2
- inspect_ai/_eval/task/images.py +40 -25
- inspect_ai/_eval/task/results.py +50 -22
- inspect_ai/_eval/task/run.py +180 -124
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_eval/task/task.py +140 -25
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/content.py +23 -1
- inspect_ai/_util/images.py +20 -17
- inspect_ai/_util/kvstore.py +73 -0
- inspect_ai/_util/notgiven.py +18 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_util/thread.py +5 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25375 -1846
- inspect_ai/_view/www/log-schema.json +129 -15
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +8 -10
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
- inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +75 -2
- inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +62 -27
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/Json.mjs +12 -6
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/util.py +2 -2
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/dataset/_sources/csv.py +2 -1
- inspect_ai/dataset/_sources/json.py +2 -1
- inspect_ai/dataset/_sources/util.py +15 -7
- inspect_ai/log/_condense.py +11 -1
- inspect_ai/log/_log.py +3 -6
- inspect_ai/log/_recorders/eval.py +19 -8
- inspect_ai/log/_samples.py +26 -5
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +10 -2
- inspect_ai/model/_call_tools.py +59 -12
- inspect_ai/model/_chat_message.py +2 -4
- inspect_ai/model/_conversation.py +61 -0
- inspect_ai/model/_generate_config.py +10 -4
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +7 -2
- inspect_ai/model/_providers/anthropic.py +109 -51
- inspect_ai/model/_providers/azureai.py +26 -24
- inspect_ai/model/_providers/bedrock.py +43 -44
- inspect_ai/model/_providers/google.py +121 -58
- inspect_ai/model/_providers/groq.py +7 -5
- inspect_ai/model/_providers/hf.py +11 -6
- inspect_ai/model/_providers/mistral.py +17 -20
- inspect_ai/model/_providers/openai.py +32 -21
- inspect_ai/model/_providers/openai_o1.py +9 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +8 -8
- inspect_ai/model/_providers/vertex.py +18 -8
- inspect_ai/scorer/__init__.py +13 -2
- inspect_ai/scorer/_metrics/__init__.py +2 -2
- inspect_ai/scorer/_metrics/std.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +2 -2
- inspect_ai/solver/__init__.py +2 -5
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +11 -1
- inspect_ai/tool/_tool.py +21 -3
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -3
- inspect_ai/util/{_trace.py → _conversation.py} +3 -17
- inspect_ai/util/_display.py +14 -4
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/context.py +12 -13
- inspect_ai/util/_sandbox/docker/compose.py +24 -11
- inspect_ai/util/_sandbox/docker/docker.py +84 -14
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/environment.py +27 -1
- inspect_ai/util/_sandbox/local.py +1 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/model/_trace.py +0 -48
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
inspect_ai/__init__.py
CHANGED
@@ -7,7 +7,7 @@ from inspect_ai._eval.evalset import eval_set
|
|
7
7
|
from inspect_ai._eval.list import list_tasks
|
8
8
|
from inspect_ai._eval.registry import task
|
9
9
|
from inspect_ai._eval.score import score, score_async
|
10
|
-
from inspect_ai._eval.task import Epochs, Task, TaskInfo, Tasks
|
10
|
+
from inspect_ai._eval.task import Epochs, Task, TaskInfo, Tasks, task_with
|
11
11
|
from inspect_ai._util.constants import PKG_NAME
|
12
12
|
from inspect_ai.solver._human_agent.agent import human_agent
|
13
13
|
|
@@ -29,4 +29,5 @@ __all__ = [
|
|
29
29
|
"TaskInfo",
|
30
30
|
"Tasks",
|
31
31
|
"task",
|
32
|
+
"task_with",
|
32
33
|
]
|
inspect_ai/_cli/common.py
CHANGED
@@ -2,6 +2,7 @@ import functools
|
|
2
2
|
from typing import Any, Callable, Literal, cast
|
3
3
|
|
4
4
|
import click
|
5
|
+
import rich
|
5
6
|
from typing_extensions import TypedDict
|
6
7
|
|
7
8
|
from inspect_ai._util.constants import (
|
@@ -17,7 +18,7 @@ class CommonOptions(TypedDict):
|
|
17
18
|
log_level: str
|
18
19
|
log_level_transcript: str
|
19
20
|
log_dir: str
|
20
|
-
display: Literal["full", "rich", "plain", "none"]
|
21
|
+
display: Literal["full", "conversation", "rich", "plain", "none"]
|
21
22
|
no_ansi: bool | None
|
22
23
|
debug: bool
|
23
24
|
debug_port: int
|
@@ -64,7 +65,9 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
64
65
|
)
|
65
66
|
@click.option(
|
66
67
|
"--display",
|
67
|
-
type=click.Choice(
|
68
|
+
type=click.Choice(
|
69
|
+
["full", "conversation", "rich", "plain", "none"], case_sensitive=False
|
70
|
+
),
|
68
71
|
default=DEFAULT_DISPLAY,
|
69
72
|
envvar="INSPECT_DISPLAY",
|
70
73
|
help="Set the display type (defaults to 'full')",
|
@@ -103,7 +106,8 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
103
106
|
def process_common_options(options: CommonOptions) -> None:
|
104
107
|
# propagate display
|
105
108
|
if options["no_ansi"]:
|
106
|
-
display = "
|
109
|
+
display = "rich"
|
110
|
+
rich.reconfigure(no_color=True)
|
107
111
|
else:
|
108
112
|
display = options["display"].lower().strip()
|
109
113
|
init_display_type(display)
|
inspect_ai/_cli/eval.py
CHANGED
@@ -118,6 +118,7 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
118
118
|
"--trace",
|
119
119
|
type=bool,
|
120
120
|
is_flag=True,
|
121
|
+
hidden=True,
|
121
122
|
envvar="INSPECT_EVAL_TRACE",
|
122
123
|
help="Trace message interactions with evaluated model to terminal.",
|
123
124
|
)
|
@@ -347,13 +348,13 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
347
348
|
"--logprobs",
|
348
349
|
type=bool,
|
349
350
|
is_flag=True,
|
350
|
-
help="Return log probabilities of the output tokens. OpenAI,
|
351
|
+
help="Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
|
351
352
|
envvar="INSPECT_EVAL_LOGPROBS",
|
352
353
|
)
|
353
354
|
@click.option(
|
354
355
|
"--top-logprobs",
|
355
356
|
type=int,
|
356
|
-
help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI,
|
357
|
+
help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, TogetherAI, Huggingface, and vLLM only.",
|
357
358
|
envvar="INSPECT_EVAL_TOP_LOGPROBS",
|
358
359
|
)
|
359
360
|
@click.option(
|
@@ -364,6 +365,14 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
|
|
364
365
|
help="Whether to enable parallel function calling during tool use (defaults to True) OpenAI and Groq only.",
|
365
366
|
envvar="INSPECT_EVAL_PARALLEL_TOOL_CALLS",
|
366
367
|
)
|
368
|
+
@click.option(
|
369
|
+
"--internal-tools/--no-internal-tools",
|
370
|
+
type=bool,
|
371
|
+
is_flag=True,
|
372
|
+
default=True,
|
373
|
+
help="Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic).",
|
374
|
+
envvar="INSPECT_EVAL_INTERNAL_TOOLS",
|
375
|
+
)
|
367
376
|
@click.option(
|
368
377
|
"--max-tool-output",
|
369
378
|
type=int,
|
@@ -438,6 +447,7 @@ def eval_command(
|
|
438
447
|
logprobs: bool | None,
|
439
448
|
top_logprobs: int | None,
|
440
449
|
parallel_tool_calls: bool | None,
|
450
|
+
internal_tools: bool | None,
|
441
451
|
max_tool_output: int | None,
|
442
452
|
cache_prompt: str | None,
|
443
453
|
reasoning_effort: str | None,
|
@@ -597,6 +607,7 @@ def eval_set_command(
|
|
597
607
|
logprobs: bool | None,
|
598
608
|
top_logprobs: int | None,
|
599
609
|
parallel_tool_calls: bool | None,
|
610
|
+
internal_tools: bool | None,
|
600
611
|
max_tool_output: int | None,
|
601
612
|
cache_prompt: str | None,
|
602
613
|
reasoning_effort: str | None,
|
@@ -835,6 +846,9 @@ def config_from_locals(locals: dict[str, Any]) -> GenerateConfigArgs:
|
|
835
846
|
if key == "parallel_tool_calls":
|
836
847
|
if value is not False:
|
837
848
|
value = None
|
849
|
+
if key == "internal_tools":
|
850
|
+
if value is not False:
|
851
|
+
value = None
|
838
852
|
config[key] = value # type: ignore
|
839
853
|
return config
|
840
854
|
|
@@ -886,6 +900,7 @@ def parse_comma_separated(value: str | None) -> list[str] | None:
|
|
886
900
|
"--trace",
|
887
901
|
type=bool,
|
888
902
|
is_flag=True,
|
903
|
+
hidden=True,
|
889
904
|
help="Trace message interactions with evaluated model to terminal.",
|
890
905
|
envvar="INSPECT_EVAL_TRACE",
|
891
906
|
)
|
inspect_ai/_cli/trace.py
CHANGED
@@ -62,11 +62,21 @@ def list_command(json: bool) -> None:
|
|
62
62
|
|
63
63
|
@trace_command.command("dump")
|
64
64
|
@click.argument("trace-file", type=str, required=False)
|
65
|
-
|
65
|
+
@click.option(
|
66
|
+
"--filter",
|
67
|
+
type=str,
|
68
|
+
help="Filter (applied to trace message field).",
|
69
|
+
)
|
70
|
+
def dump_command(trace_file: str | None, filter: str | None) -> None:
|
66
71
|
"""Dump a trace file to stdout (as a JSON array of log records)."""
|
67
72
|
trace_file_path = _resolve_trace_file_path(trace_file)
|
68
73
|
|
69
74
|
traces = read_trace_file(trace_file_path)
|
75
|
+
|
76
|
+
if filter:
|
77
|
+
filter = filter.lower()
|
78
|
+
traces = [trace for trace in traces if filter in trace.message.lower()]
|
79
|
+
|
70
80
|
print(
|
71
81
|
to_json(traces, indent=2, exclude_none=True, fallback=lambda _: None).decode()
|
72
82
|
)
|
@@ -74,17 +84,26 @@ def dump_command(trace_file: str | None) -> None:
|
|
74
84
|
|
75
85
|
@trace_command.command("anomalies")
|
76
86
|
@click.argument("trace-file", type=str, required=False)
|
87
|
+
@click.option(
|
88
|
+
"--filter",
|
89
|
+
type=str,
|
90
|
+
help="Filter (applied to trace message field).",
|
91
|
+
)
|
77
92
|
@click.option(
|
78
93
|
"--all",
|
79
94
|
is_flag=True,
|
80
95
|
default=False,
|
81
96
|
help="Show all anomolies including errors and timeouts (by default only still running and cancelled actions are shown).",
|
82
97
|
)
|
83
|
-
def anomolies_command(trace_file: str | None, all: bool) -> None:
|
98
|
+
def anomolies_command(trace_file: str | None, filter: str | None, all: bool) -> None:
|
84
99
|
"""Look for anomalies in a trace file (never completed or cancelled actions)."""
|
85
100
|
trace_file_path = _resolve_trace_file_path(trace_file)
|
86
101
|
traces = read_trace_file(trace_file_path)
|
87
102
|
|
103
|
+
if filter:
|
104
|
+
filter = filter.lower()
|
105
|
+
traces = [trace for trace in traces if filter in trace.message.lower()]
|
106
|
+
|
88
107
|
# Track started actions
|
89
108
|
running_actions: dict[str, ActionTraceRecord] = {}
|
90
109
|
canceled_actions: dict[str, ActionTraceRecord] = {}
|
@@ -4,8 +4,8 @@ from contextvars import ContextVar
|
|
4
4
|
import rich
|
5
5
|
|
6
6
|
from inspect_ai.util._display import display_type
|
7
|
-
from inspect_ai.util._trace import trace_enabled
|
8
7
|
|
8
|
+
from ..plain.display import PlainDisplay
|
9
9
|
from ..rich.display import RichDisplay
|
10
10
|
from ..textual.display import TextualDisplay
|
11
11
|
from .display import Display, TaskScreen
|
@@ -14,10 +14,11 @@ from .display import Display, TaskScreen
|
|
14
14
|
def display() -> Display:
|
15
15
|
global _active_display
|
16
16
|
if _active_display is None:
|
17
|
-
if (
|
17
|
+
if display_type() == "plain":
|
18
|
+
_active_display = PlainDisplay()
|
19
|
+
elif (
|
18
20
|
display_type() == "full"
|
19
21
|
and sys.stdout.isatty()
|
20
|
-
and not trace_enabled()
|
21
22
|
and not rich.get_console().is_jupyter
|
22
23
|
):
|
23
24
|
_active_display = TextualDisplay()
|
@@ -13,14 +13,14 @@ def task_config(
|
|
13
13
|
value = task_args[key]
|
14
14
|
if is_registry_dict(value):
|
15
15
|
task_args[key] = value["name"]
|
16
|
-
config =
|
16
|
+
config = dict(profile.eval_config.model_dump(exclude_none=True)) | task_args
|
17
17
|
if generate_config:
|
18
|
-
config =
|
18
|
+
config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
|
19
19
|
if profile.tags:
|
20
20
|
config["tags"] = ",".join(profile.tags)
|
21
21
|
config_print: list[str] = []
|
22
22
|
for name, value in config.items():
|
23
|
-
if name == "approval":
|
23
|
+
if name == "approval" and isinstance(value, dict):
|
24
24
|
config_print.append(
|
25
25
|
f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
|
26
26
|
)
|
@@ -50,9 +50,13 @@ def task_panel(
|
|
50
50
|
table.add_row(subtitle_table)
|
51
51
|
|
52
52
|
# main progress and task info
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
if body:
|
54
|
+
table.add_row()
|
55
|
+
table.add_row(body)
|
56
|
+
|
57
|
+
# spacing if there is more ocontent
|
58
|
+
if footer or log_location:
|
59
|
+
table.add_row()
|
56
60
|
|
57
61
|
# footer if specified
|
58
62
|
if footer:
|
File without changes
|
@@ -0,0 +1,203 @@
|
|
1
|
+
import asyncio
|
2
|
+
import contextlib
|
3
|
+
from typing import Any, AsyncIterator, Coroutine, Iterator
|
4
|
+
|
5
|
+
import rich
|
6
|
+
|
7
|
+
from inspect_ai._display.core.rich import rich_initialise
|
8
|
+
from inspect_ai._util.text import truncate
|
9
|
+
from inspect_ai._util.throttle import throttle
|
10
|
+
|
11
|
+
from ...util._concurrency import concurrency_status
|
12
|
+
from ..core.config import task_config
|
13
|
+
from ..core.display import (
|
14
|
+
TR,
|
15
|
+
Display,
|
16
|
+
Progress,
|
17
|
+
TaskDisplay,
|
18
|
+
TaskDisplayMetric,
|
19
|
+
TaskProfile,
|
20
|
+
TaskResult,
|
21
|
+
TaskScreen,
|
22
|
+
TaskSpec,
|
23
|
+
TaskWithResult,
|
24
|
+
)
|
25
|
+
from ..core.footer import task_http_rate_limits
|
26
|
+
from ..core.panel import task_panel, task_targets
|
27
|
+
from ..core.results import task_metric, tasks_results
|
28
|
+
|
29
|
+
|
30
|
+
class PlainDisplay(Display):
|
31
|
+
def __init__(self) -> None:
|
32
|
+
self.total_tasks: int = 0
|
33
|
+
self.tasks: list[TaskWithResult] = []
|
34
|
+
self.parallel = False
|
35
|
+
rich_initialise()
|
36
|
+
|
37
|
+
def print(self, message: str) -> None:
|
38
|
+
print(message)
|
39
|
+
|
40
|
+
@contextlib.contextmanager
|
41
|
+
def progress(self, total: int) -> Iterator[Progress]:
|
42
|
+
yield PlainProgress(total)
|
43
|
+
|
44
|
+
def run_task_app(self, main: Coroutine[Any, Any, TR]) -> TR:
|
45
|
+
return asyncio.run(main)
|
46
|
+
|
47
|
+
@contextlib.contextmanager
|
48
|
+
def suspend_task_app(self) -> Iterator[None]:
|
49
|
+
yield
|
50
|
+
|
51
|
+
@contextlib.asynccontextmanager
|
52
|
+
async def task_screen(
|
53
|
+
self, tasks: list[TaskSpec], parallel: bool
|
54
|
+
) -> AsyncIterator[TaskScreen]:
|
55
|
+
self.total_tasks = len(tasks)
|
56
|
+
self.multiple_task_names = len({task.name for task in tasks}) > 1
|
57
|
+
self.multiple_model_names = len({str(task.model) for task in tasks}) > 1
|
58
|
+
self.tasks = []
|
59
|
+
self.parallel = parallel
|
60
|
+
try:
|
61
|
+
# Print header for task(s)
|
62
|
+
if parallel:
|
63
|
+
print(f"Running {self.total_tasks} tasks...")
|
64
|
+
yield TaskScreen()
|
65
|
+
finally:
|
66
|
+
# Print final results
|
67
|
+
if self.tasks:
|
68
|
+
self._print_results()
|
69
|
+
|
70
|
+
@contextlib.contextmanager
|
71
|
+
def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
|
72
|
+
# Print initial task information using a rich panel
|
73
|
+
panel = task_panel(
|
74
|
+
profile=profile,
|
75
|
+
show_model=True,
|
76
|
+
body="", # Empty body since we haven't started yet
|
77
|
+
subtitle=(task_config(profile), task_targets(profile)),
|
78
|
+
footer=None,
|
79
|
+
log_location=None,
|
80
|
+
)
|
81
|
+
rich.print(panel)
|
82
|
+
|
83
|
+
# Create and yield task display
|
84
|
+
task = TaskWithResult(profile, None)
|
85
|
+
self.tasks.append(task)
|
86
|
+
yield PlainTaskDisplay(
|
87
|
+
task,
|
88
|
+
show_task_names=self.multiple_task_names,
|
89
|
+
show_model_names=self.multiple_model_names,
|
90
|
+
)
|
91
|
+
|
92
|
+
def _print_results(self) -> None:
|
93
|
+
"""Print final results using rich panels"""
|
94
|
+
panels = tasks_results(self.tasks)
|
95
|
+
rich.print(panels)
|
96
|
+
|
97
|
+
|
98
|
+
class PlainProgress(Progress):
|
99
|
+
def __init__(self, total: int):
|
100
|
+
self.total = total
|
101
|
+
self.current = 0
|
102
|
+
|
103
|
+
def update(self, n: int = 1) -> None:
|
104
|
+
self.current += n
|
105
|
+
# No direct printing - PlainTaskDisplay handles it
|
106
|
+
|
107
|
+
def complete(self) -> None:
|
108
|
+
self.current = self.total
|
109
|
+
|
110
|
+
|
111
|
+
class PlainTaskDisplay(TaskDisplay):
|
112
|
+
def __init__(
|
113
|
+
self, task: TaskWithResult, *, show_task_names: bool, show_model_names: bool
|
114
|
+
):
|
115
|
+
self.task = task
|
116
|
+
self.show_task_names = show_task_names
|
117
|
+
self.show_model_names = show_model_names
|
118
|
+
self.progress_display: PlainProgress | None = None
|
119
|
+
self.samples_complete = 0
|
120
|
+
self.samples_total = 0
|
121
|
+
self.current_metrics: list[TaskDisplayMetric] | None = None
|
122
|
+
self.last_progress = 0 # Track last progress percentage
|
123
|
+
|
124
|
+
@contextlib.contextmanager
|
125
|
+
def progress(self) -> Iterator[Progress]:
|
126
|
+
self.progress_display = PlainProgress(self.task.profile.steps)
|
127
|
+
yield self.progress_display
|
128
|
+
|
129
|
+
@throttle(1)
|
130
|
+
def _print_status_throttled(self) -> None:
|
131
|
+
self._print_status()
|
132
|
+
|
133
|
+
def _print_status(self) -> None:
|
134
|
+
"""Print status updates on new lines when there's meaningful progress"""
|
135
|
+
if not self.progress_display:
|
136
|
+
return
|
137
|
+
|
138
|
+
# Calculate current progress percentage
|
139
|
+
current_progress = int(
|
140
|
+
self.progress_display.current / self.progress_display.total * 100
|
141
|
+
)
|
142
|
+
|
143
|
+
# Only print on percentage changes to avoid too much output
|
144
|
+
if current_progress != self.last_progress:
|
145
|
+
status_parts: list[str] = []
|
146
|
+
|
147
|
+
# if this is parallel print task and model to distinguish (limit both to 12 chars)
|
148
|
+
MAX_NAME_WIDTH = 12
|
149
|
+
if self.show_task_names:
|
150
|
+
status_parts.append(truncate(self.task.profile.name, MAX_NAME_WIDTH))
|
151
|
+
if self.show_model_names:
|
152
|
+
status_parts.append(
|
153
|
+
truncate(str(self.task.profile.model), MAX_NAME_WIDTH)
|
154
|
+
)
|
155
|
+
|
156
|
+
# Add step progress
|
157
|
+
status_parts.append(
|
158
|
+
f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {current_progress:3d}%"
|
159
|
+
)
|
160
|
+
|
161
|
+
# Add sample progress
|
162
|
+
status_parts.append(
|
163
|
+
f"Samples: {self.samples_complete:3d}/{self.samples_total:3d}"
|
164
|
+
)
|
165
|
+
|
166
|
+
# Add metrics
|
167
|
+
if self.current_metrics:
|
168
|
+
metric_str = task_metric(self.current_metrics)
|
169
|
+
status_parts.append(metric_str)
|
170
|
+
|
171
|
+
# Add resource usage
|
172
|
+
# Very similar to ``inspect_ai._display.core.footer.task_resources``, but without
|
173
|
+
# the rich formatting added in the ``task_dict`` call
|
174
|
+
resources_dict: dict[str, str] = {}
|
175
|
+
for model, resource in concurrency_status().items():
|
176
|
+
resources_dict[model] = f"{resource[0]:2d}/{resource[1]:2d}"
|
177
|
+
resources = ", ".join(
|
178
|
+
[f"{key}: {value}" for key, value in resources_dict.items()]
|
179
|
+
)
|
180
|
+
status_parts.append(resources)
|
181
|
+
|
182
|
+
# Add rate limits
|
183
|
+
rate_limits = task_http_rate_limits()
|
184
|
+
if rate_limits:
|
185
|
+
status_parts.append(rate_limits)
|
186
|
+
|
187
|
+
# Print on new line
|
188
|
+
print(" | ".join(status_parts))
|
189
|
+
|
190
|
+
self.last_progress = current_progress
|
191
|
+
|
192
|
+
def sample_complete(self, complete: int, total: int) -> None:
|
193
|
+
self.samples_complete = complete
|
194
|
+
self.samples_total = total
|
195
|
+
self._print_status_throttled()
|
196
|
+
|
197
|
+
def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
|
198
|
+
self.current_metrics = metrics
|
199
|
+
self._print_status_throttled()
|
200
|
+
|
201
|
+
def complete(self, result: TaskResult) -> None:
|
202
|
+
self.task.result = result
|
203
|
+
self._print_status()
|
@@ -15,7 +15,6 @@ from inspect_ai._util.constants import CONSOLE_DISPLAY_WIDTH
|
|
15
15
|
from inspect_ai.log._transcript import InputEvent, transcript
|
16
16
|
from inspect_ai.util._display import display_type
|
17
17
|
from inspect_ai.util._throttle import throttle
|
18
|
-
from inspect_ai.util._trace import trace_enabled
|
19
18
|
|
20
19
|
from ..core.config import task_config
|
21
20
|
from ..core.display import (
|
@@ -130,11 +129,6 @@ class RichDisplay(Display):
|
|
130
129
|
@override
|
131
130
|
@contextlib.contextmanager
|
132
131
|
def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
|
133
|
-
# if there is no ansi display than all of the below will
|
134
|
-
# be a no-op, so we print a simple text message for the task
|
135
|
-
if display_type() == "plain":
|
136
|
-
rich.get_console().print(task_no_ansi(profile))
|
137
|
-
|
138
132
|
# for typechekcer
|
139
133
|
if self.tasks is None:
|
140
134
|
self.tasks = []
|
@@ -151,7 +145,8 @@ class RichDisplay(Display):
|
|
151
145
|
@throttle(1)
|
152
146
|
def _update_display(self) -> None:
|
153
147
|
if (
|
154
|
-
|
148
|
+
display_type() != "conversation"
|
149
|
+
and self.tasks is not None
|
155
150
|
and self.tasks
|
156
151
|
and self.progress_ui is not None
|
157
152
|
and self.live is not None
|
@@ -170,7 +165,7 @@ class RichTaskScreen(TaskScreen):
|
|
170
165
|
def __init__(self, live: Live) -> None:
|
171
166
|
self.theme = rich_theme()
|
172
167
|
self.live = live
|
173
|
-
status_text = "Working" if
|
168
|
+
status_text = "Working" if display_type() == "conversation" else "Task running"
|
174
169
|
self.status = self.live.console.status(
|
175
170
|
f"[{self.theme.meta} bold]{status_text}...[/{self.theme.meta} bold]",
|
176
171
|
spinner="clock",
|
@@ -189,7 +184,7 @@ class RichTaskScreen(TaskScreen):
|
|
189
184
|
) -> Iterator[Console]:
|
190
185
|
# determine transient based on trace mode
|
191
186
|
if transient is None:
|
192
|
-
transient =
|
187
|
+
transient = display_type() != "conversation"
|
193
188
|
|
194
189
|
# clear live task status and transient status
|
195
190
|
self.live.update("", refresh=True)
|
@@ -284,7 +284,10 @@ class TaskScreenApp(App[TR]):
|
|
284
284
|
|
285
285
|
def update_samples(self) -> None:
|
286
286
|
samples_view = self.query_one(SamplesView)
|
287
|
-
|
287
|
+
active_and_started_samples = [
|
288
|
+
sample for sample in active_samples() if sample.started is not None
|
289
|
+
]
|
290
|
+
samples_view.set_samples(active_and_started_samples)
|
288
291
|
|
289
292
|
def update_footer(self) -> None:
|
290
293
|
left, right = task_footer()
|
@@ -0,0 +1,110 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from textual.app import ComposeResult
|
4
|
+
from textual.containers import HorizontalScroll
|
5
|
+
from textual.widget import Widget
|
6
|
+
from textual.widgets import Link, Static
|
7
|
+
|
8
|
+
from inspect_ai._util.port_names import get_service_by_port
|
9
|
+
from inspect_ai.util._sandbox.environment import PortMapping
|
10
|
+
|
11
|
+
|
12
|
+
class PortMappingsView(HorizontalScroll):
|
13
|
+
DEFAULT_CSS = """
|
14
|
+
PortMappingsView {
|
15
|
+
layout: grid;
|
16
|
+
height: auto;
|
17
|
+
grid-size: 4 3;
|
18
|
+
grid-columns: auto auto auto auto;
|
19
|
+
grid-gutter: 0 1;
|
20
|
+
}
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, ports: list[PortMapping] | None) -> None:
|
24
|
+
super().__init__()
|
25
|
+
self.ports = ports
|
26
|
+
|
27
|
+
def compose(self) -> ComposeResult:
|
28
|
+
if not self.ports:
|
29
|
+
return
|
30
|
+
yield Static("service")
|
31
|
+
yield Static("sandbox")
|
32
|
+
yield Static("client")
|
33
|
+
yield Static("endpoint")
|
34
|
+
mappings_and_services = [
|
35
|
+
(mapping, get_service_by_port(mapping.container_port, mapping.protocol))
|
36
|
+
for mapping in self.ports
|
37
|
+
]
|
38
|
+
remaining_widgets = [
|
39
|
+
widget
|
40
|
+
for mapping_and_service in mappings_and_services
|
41
|
+
for widget in widgets_from_port_mapping(mapping_and_service)
|
42
|
+
]
|
43
|
+
for widget in remaining_widgets:
|
44
|
+
yield widget
|
45
|
+
|
46
|
+
|
47
|
+
def widgets_for_port_mappings(
|
48
|
+
port_mappings: list[PortMapping] | None,
|
49
|
+
) -> list[Widget]:
|
50
|
+
if port_mappings is None:
|
51
|
+
return []
|
52
|
+
return [
|
53
|
+
static
|
54
|
+
for mapping in [
|
55
|
+
(mapping, get_service_by_port(mapping.container_port, mapping.protocol))
|
56
|
+
for mapping in port_mappings
|
57
|
+
]
|
58
|
+
for static in widgets_from_port_mapping(mapping)
|
59
|
+
]
|
60
|
+
|
61
|
+
|
62
|
+
def widgets_from_port_mapping(
|
63
|
+
mapping_service_tuple: tuple[PortMapping, str | None],
|
64
|
+
) -> list[Widget]:
|
65
|
+
port_mapping, service = mapping_service_tuple
|
66
|
+
return [
|
67
|
+
widget
|
68
|
+
for host_mapping in port_mapping.mappings
|
69
|
+
for widget in get_row_widgets(
|
70
|
+
port_mapping.protocol,
|
71
|
+
host_mapping.host_port,
|
72
|
+
port_mapping.container_port,
|
73
|
+
service,
|
74
|
+
)
|
75
|
+
]
|
76
|
+
|
77
|
+
|
78
|
+
def get_row_widgets(
|
79
|
+
protocol: Literal["tcp", "udp"],
|
80
|
+
host_port: int,
|
81
|
+
container_port: int,
|
82
|
+
service: str | None,
|
83
|
+
) -> list[Widget]:
|
84
|
+
url = get_url(
|
85
|
+
host_port,
|
86
|
+
service,
|
87
|
+
)
|
88
|
+
return [
|
89
|
+
Static(service if service is not None else protocol),
|
90
|
+
Static(str(container_port)),
|
91
|
+
Static(str(host_port)),
|
92
|
+
Link(url) if url is not None else Static("asdf"),
|
93
|
+
]
|
94
|
+
|
95
|
+
|
96
|
+
def get_url(
|
97
|
+
host_port: int,
|
98
|
+
service: str | None,
|
99
|
+
) -> str | None:
|
100
|
+
if service is not None:
|
101
|
+
if service == "noVNC":
|
102
|
+
return f"http://localhost:{host_port}?view_only=true&autoconnect=true&resize=scale"
|
103
|
+
|
104
|
+
if service.startswith("HTTP"):
|
105
|
+
return f"https://localhost:{host_port}"
|
106
|
+
|
107
|
+
if service.startswith("VNC"):
|
108
|
+
return f"vnc://localhost:{host_port}"
|
109
|
+
|
110
|
+
return None
|