inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/common.py +7 -3
  3. inspect_ai/_cli/eval.py +17 -2
  4. inspect_ai/_cli/trace.py +21 -2
  5. inspect_ai/_display/core/active.py +4 -3
  6. inspect_ai/_display/core/config.py +3 -3
  7. inspect_ai/_display/core/panel.py +7 -3
  8. inspect_ai/_display/plain/__init__.py +0 -0
  9. inspect_ai/_display/plain/display.py +203 -0
  10. inspect_ai/_display/rich/display.py +4 -9
  11. inspect_ai/_display/textual/app.py +4 -1
  12. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  13. inspect_ai/_display/textual/widgets/samples.py +119 -16
  14. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  15. inspect_ai/_eval/eval.py +32 -20
  16. inspect_ai/_eval/evalset.py +7 -5
  17. inspect_ai/_eval/score.py +1 -0
  18. inspect_ai/_eval/task/__init__.py +2 -2
  19. inspect_ai/_eval/task/images.py +40 -25
  20. inspect_ai/_eval/task/results.py +50 -22
  21. inspect_ai/_eval/task/run.py +180 -124
  22. inspect_ai/_eval/task/sandbox.py +10 -5
  23. inspect_ai/_eval/task/task.py +140 -25
  24. inspect_ai/_util/constants.py +2 -0
  25. inspect_ai/_util/content.py +23 -1
  26. inspect_ai/_util/images.py +20 -17
  27. inspect_ai/_util/kvstore.py +73 -0
  28. inspect_ai/_util/notgiven.py +18 -0
  29. inspect_ai/_util/port_names.py +61 -0
  30. inspect_ai/_util/text.py +23 -0
  31. inspect_ai/_util/thread.py +5 -0
  32. inspect_ai/_view/www/App.css +31 -1
  33. inspect_ai/_view/www/dist/assets/index.css +31 -1
  34. inspect_ai/_view/www/dist/assets/index.js +25375 -1846
  35. inspect_ai/_view/www/log-schema.json +129 -15
  36. inspect_ai/_view/www/package.json +2 -0
  37. inspect_ai/_view/www/src/App.mjs +8 -10
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  40. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  41. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  42. inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
  43. inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
  44. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  45. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  46. inspect_ai/_view/www/src/index.js +75 -2
  47. inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
  48. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
  49. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  50. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  51. inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
  52. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  53. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
  54. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
  55. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  56. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +62 -27
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/Json.mjs +12 -6
  76. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
  77. inspect_ai/_view/www/vite.config.js +7 -0
  78. inspect_ai/_view/www/yarn.lock +116 -0
  79. inspect_ai/approval/_human/__init__.py +0 -0
  80. inspect_ai/approval/_human/util.py +2 -2
  81. inspect_ai/approval/_policy.py +12 -6
  82. inspect_ai/dataset/_sources/csv.py +2 -1
  83. inspect_ai/dataset/_sources/json.py +2 -1
  84. inspect_ai/dataset/_sources/util.py +15 -7
  85. inspect_ai/log/_condense.py +11 -1
  86. inspect_ai/log/_log.py +3 -6
  87. inspect_ai/log/_recorders/eval.py +19 -8
  88. inspect_ai/log/_samples.py +26 -5
  89. inspect_ai/log/_transcript.py +32 -2
  90. inspect_ai/model/__init__.py +10 -2
  91. inspect_ai/model/_call_tools.py +59 -12
  92. inspect_ai/model/_chat_message.py +2 -4
  93. inspect_ai/model/_conversation.py +61 -0
  94. inspect_ai/model/_generate_config.py +10 -4
  95. inspect_ai/model/_model.py +117 -18
  96. inspect_ai/model/_model_output.py +7 -2
  97. inspect_ai/model/_providers/anthropic.py +109 -51
  98. inspect_ai/model/_providers/azureai.py +26 -24
  99. inspect_ai/model/_providers/bedrock.py +43 -44
  100. inspect_ai/model/_providers/google.py +121 -58
  101. inspect_ai/model/_providers/groq.py +7 -5
  102. inspect_ai/model/_providers/hf.py +11 -6
  103. inspect_ai/model/_providers/mistral.py +17 -20
  104. inspect_ai/model/_providers/openai.py +32 -21
  105. inspect_ai/model/_providers/openai_o1.py +9 -8
  106. inspect_ai/model/_providers/providers.py +1 -1
  107. inspect_ai/model/_providers/together.py +8 -8
  108. inspect_ai/model/_providers/vertex.py +18 -8
  109. inspect_ai/scorer/__init__.py +13 -2
  110. inspect_ai/scorer/_metrics/__init__.py +2 -2
  111. inspect_ai/scorer/_metrics/std.py +3 -3
  112. inspect_ai/scorer/_reducer/reducer.py +1 -1
  113. inspect_ai/scorer/_scorer.py +2 -2
  114. inspect_ai/solver/__init__.py +2 -5
  115. inspect_ai/solver/_prompt.py +35 -5
  116. inspect_ai/solver/_task_state.py +80 -38
  117. inspect_ai/tool/__init__.py +11 -1
  118. inspect_ai/tool/_tool.py +21 -3
  119. inspect_ai/tool/_tool_call.py +10 -0
  120. inspect_ai/tool/_tool_def.py +16 -5
  121. inspect_ai/tool/_tool_with.py +21 -4
  122. inspect_ai/tool/beta/__init__.py +5 -0
  123. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  124. inspect_ai/tool/beta/_computer/_common.py +133 -0
  125. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  126. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  127. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  128. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  129. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  130. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  131. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  134. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  135. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  136. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  137. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  138. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  139. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  144. inspect_ai/util/__init__.py +2 -3
  145. inspect_ai/util/{_trace.py → _conversation.py} +3 -17
  146. inspect_ai/util/_display.py +14 -4
  147. inspect_ai/util/_limit.py +26 -0
  148. inspect_ai/util/_sandbox/context.py +12 -13
  149. inspect_ai/util/_sandbox/docker/compose.py +24 -11
  150. inspect_ai/util/_sandbox/docker/docker.py +84 -14
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/environment.py +27 -1
  153. inspect_ai/util/_sandbox/local.py +1 -0
  154. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
  155. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
  156. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  157. inspect_ai/model/_trace.py +0 -48
  158. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
  159. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
  160. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
  161. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
inspect_ai/__init__.py CHANGED
@@ -7,7 +7,7 @@ from inspect_ai._eval.evalset import eval_set
7
7
  from inspect_ai._eval.list import list_tasks
8
8
  from inspect_ai._eval.registry import task
9
9
  from inspect_ai._eval.score import score, score_async
10
- from inspect_ai._eval.task import Epochs, Task, TaskInfo, Tasks
10
+ from inspect_ai._eval.task import Epochs, Task, TaskInfo, Tasks, task_with
11
11
  from inspect_ai._util.constants import PKG_NAME
12
12
  from inspect_ai.solver._human_agent.agent import human_agent
13
13
 
@@ -29,4 +29,5 @@ __all__ = [
29
29
  "TaskInfo",
30
30
  "Tasks",
31
31
  "task",
32
+ "task_with",
32
33
  ]
inspect_ai/_cli/common.py CHANGED
@@ -2,6 +2,7 @@ import functools
2
2
  from typing import Any, Callable, Literal, cast
3
3
 
4
4
  import click
5
+ import rich
5
6
  from typing_extensions import TypedDict
6
7
 
7
8
  from inspect_ai._util.constants import (
@@ -17,7 +18,7 @@ class CommonOptions(TypedDict):
17
18
  log_level: str
18
19
  log_level_transcript: str
19
20
  log_dir: str
20
- display: Literal["full", "rich", "plain", "none"]
21
+ display: Literal["full", "conversation", "rich", "plain", "none"]
21
22
  no_ansi: bool | None
22
23
  debug: bool
23
24
  debug_port: int
@@ -64,7 +65,9 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
64
65
  )
65
66
  @click.option(
66
67
  "--display",
67
- type=click.Choice(["full", "rich", "plain", "none"], case_sensitive=False),
68
+ type=click.Choice(
69
+ ["full", "conversation", "rich", "plain", "none"], case_sensitive=False
70
+ ),
68
71
  default=DEFAULT_DISPLAY,
69
72
  envvar="INSPECT_DISPLAY",
70
73
  help="Set the display type (defaults to 'full')",
@@ -103,7 +106,8 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
103
106
  def process_common_options(options: CommonOptions) -> None:
104
107
  # propagate display
105
108
  if options["no_ansi"]:
106
- display = "plain"
109
+ display = "rich"
110
+ rich.reconfigure(no_color=True)
107
111
  else:
108
112
  display = options["display"].lower().strip()
109
113
  init_display_type(display)
inspect_ai/_cli/eval.py CHANGED
@@ -118,6 +118,7 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
118
118
  "--trace",
119
119
  type=bool,
120
120
  is_flag=True,
121
+ hidden=True,
121
122
  envvar="INSPECT_EVAL_TRACE",
122
123
  help="Trace message interactions with evaluated model to terminal.",
123
124
  )
@@ -347,13 +348,13 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
347
348
  "--logprobs",
348
349
  type=bool,
349
350
  is_flag=True,
350
- help="Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
351
+ help="Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
351
352
  envvar="INSPECT_EVAL_LOGPROBS",
352
353
  )
353
354
  @click.option(
354
355
  "--top-logprobs",
355
356
  type=int,
356
- help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, TogetherAI, Huggingface, and vLLM only.",
357
+ help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, TogetherAI, Huggingface, and vLLM only.",
357
358
  envvar="INSPECT_EVAL_TOP_LOGPROBS",
358
359
  )
359
360
  @click.option(
@@ -364,6 +365,14 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
364
365
  help="Whether to enable parallel function calling during tool use (defaults to True) OpenAI and Groq only.",
365
366
  envvar="INSPECT_EVAL_PARALLEL_TOOL_CALLS",
366
367
  )
368
+ @click.option(
369
+ "--internal-tools/--no-internal-tools",
370
+ type=bool,
371
+ is_flag=True,
372
+ default=True,
373
+ help="Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic).",
374
+ envvar="INSPECT_EVAL_INTERNAL_TOOLS",
375
+ )
367
376
  @click.option(
368
377
  "--max-tool-output",
369
378
  type=int,
@@ -438,6 +447,7 @@ def eval_command(
438
447
  logprobs: bool | None,
439
448
  top_logprobs: int | None,
440
449
  parallel_tool_calls: bool | None,
450
+ internal_tools: bool | None,
441
451
  max_tool_output: int | None,
442
452
  cache_prompt: str | None,
443
453
  reasoning_effort: str | None,
@@ -597,6 +607,7 @@ def eval_set_command(
597
607
  logprobs: bool | None,
598
608
  top_logprobs: int | None,
599
609
  parallel_tool_calls: bool | None,
610
+ internal_tools: bool | None,
600
611
  max_tool_output: int | None,
601
612
  cache_prompt: str | None,
602
613
  reasoning_effort: str | None,
@@ -835,6 +846,9 @@ def config_from_locals(locals: dict[str, Any]) -> GenerateConfigArgs:
835
846
  if key == "parallel_tool_calls":
836
847
  if value is not False:
837
848
  value = None
849
+ if key == "internal_tools":
850
+ if value is not False:
851
+ value = None
838
852
  config[key] = value # type: ignore
839
853
  return config
840
854
 
@@ -886,6 +900,7 @@ def parse_comma_separated(value: str | None) -> list[str] | None:
886
900
  "--trace",
887
901
  type=bool,
888
902
  is_flag=True,
903
+ hidden=True,
889
904
  help="Trace message interactions with evaluated model to terminal.",
890
905
  envvar="INSPECT_EVAL_TRACE",
891
906
  )
inspect_ai/_cli/trace.py CHANGED
@@ -62,11 +62,21 @@ def list_command(json: bool) -> None:
62
62
 
63
63
  @trace_command.command("dump")
64
64
  @click.argument("trace-file", type=str, required=False)
65
- def dump_command(trace_file: str | None) -> None:
65
+ @click.option(
66
+ "--filter",
67
+ type=str,
68
+ help="Filter (applied to trace message field).",
69
+ )
70
+ def dump_command(trace_file: str | None, filter: str | None) -> None:
66
71
  """Dump a trace file to stdout (as a JSON array of log records)."""
67
72
  trace_file_path = _resolve_trace_file_path(trace_file)
68
73
 
69
74
  traces = read_trace_file(trace_file_path)
75
+
76
+ if filter:
77
+ filter = filter.lower()
78
+ traces = [trace for trace in traces if filter in trace.message.lower()]
79
+
70
80
  print(
71
81
  to_json(traces, indent=2, exclude_none=True, fallback=lambda _: None).decode()
72
82
  )
@@ -74,17 +84,26 @@ def dump_command(trace_file: str | None) -> None:
74
84
 
75
85
  @trace_command.command("anomalies")
76
86
  @click.argument("trace-file", type=str, required=False)
87
+ @click.option(
88
+ "--filter",
89
+ type=str,
90
+ help="Filter (applied to trace message field).",
91
+ )
77
92
  @click.option(
78
93
  "--all",
79
94
  is_flag=True,
80
95
  default=False,
81
96
  help="Show all anomolies including errors and timeouts (by default only still running and cancelled actions are shown).",
82
97
  )
83
- def anomolies_command(trace_file: str | None, all: bool) -> None:
98
+ def anomolies_command(trace_file: str | None, filter: str | None, all: bool) -> None:
84
99
  """Look for anomalies in a trace file (never completed or cancelled actions)."""
85
100
  trace_file_path = _resolve_trace_file_path(trace_file)
86
101
  traces = read_trace_file(trace_file_path)
87
102
 
103
+ if filter:
104
+ filter = filter.lower()
105
+ traces = [trace for trace in traces if filter in trace.message.lower()]
106
+
88
107
  # Track started actions
89
108
  running_actions: dict[str, ActionTraceRecord] = {}
90
109
  canceled_actions: dict[str, ActionTraceRecord] = {}
@@ -4,8 +4,8 @@ from contextvars import ContextVar
4
4
  import rich
5
5
 
6
6
  from inspect_ai.util._display import display_type
7
- from inspect_ai.util._trace import trace_enabled
8
7
 
8
+ from ..plain.display import PlainDisplay
9
9
  from ..rich.display import RichDisplay
10
10
  from ..textual.display import TextualDisplay
11
11
  from .display import Display, TaskScreen
@@ -14,10 +14,11 @@ from .display import Display, TaskScreen
14
14
  def display() -> Display:
15
15
  global _active_display
16
16
  if _active_display is None:
17
- if (
17
+ if display_type() == "plain":
18
+ _active_display = PlainDisplay()
19
+ elif (
18
20
  display_type() == "full"
19
21
  and sys.stdout.isatty()
20
- and not trace_enabled()
21
22
  and not rich.get_console().is_jupyter
22
23
  ):
23
24
  _active_display = TextualDisplay()
@@ -13,14 +13,14 @@ def task_config(
13
13
  value = task_args[key]
14
14
  if is_registry_dict(value):
15
15
  task_args[key] = value["name"]
16
- config = task_args | dict(profile.eval_config.model_dump(exclude_none=True))
16
+ config = dict(profile.eval_config.model_dump(exclude_none=True)) | task_args
17
17
  if generate_config:
18
- config = config | dict(profile.generate_config.model_dump(exclude_none=True))
18
+ config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
19
19
  if profile.tags:
20
20
  config["tags"] = ",".join(profile.tags)
21
21
  config_print: list[str] = []
22
22
  for name, value in config.items():
23
- if name == "approval":
23
+ if name == "approval" and isinstance(value, dict):
24
24
  config_print.append(
25
25
  f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
26
26
  )
@@ -50,9 +50,13 @@ def task_panel(
50
50
  table.add_row(subtitle_table)
51
51
 
52
52
  # main progress and task info
53
- table.add_row()
54
- table.add_row(body)
55
- table.add_row()
53
+ if body:
54
+ table.add_row()
55
+ table.add_row(body)
56
+
57
+ # spacing if there is more ocontent
58
+ if footer or log_location:
59
+ table.add_row()
56
60
 
57
61
  # footer if specified
58
62
  if footer:
File without changes
@@ -0,0 +1,203 @@
1
+ import asyncio
2
+ import contextlib
3
+ from typing import Any, AsyncIterator, Coroutine, Iterator
4
+
5
+ import rich
6
+
7
+ from inspect_ai._display.core.rich import rich_initialise
8
+ from inspect_ai._util.text import truncate
9
+ from inspect_ai._util.throttle import throttle
10
+
11
+ from ...util._concurrency import concurrency_status
12
+ from ..core.config import task_config
13
+ from ..core.display import (
14
+ TR,
15
+ Display,
16
+ Progress,
17
+ TaskDisplay,
18
+ TaskDisplayMetric,
19
+ TaskProfile,
20
+ TaskResult,
21
+ TaskScreen,
22
+ TaskSpec,
23
+ TaskWithResult,
24
+ )
25
+ from ..core.footer import task_http_rate_limits
26
+ from ..core.panel import task_panel, task_targets
27
+ from ..core.results import task_metric, tasks_results
28
+
29
+
30
+ class PlainDisplay(Display):
31
+ def __init__(self) -> None:
32
+ self.total_tasks: int = 0
33
+ self.tasks: list[TaskWithResult] = []
34
+ self.parallel = False
35
+ rich_initialise()
36
+
37
+ def print(self, message: str) -> None:
38
+ print(message)
39
+
40
+ @contextlib.contextmanager
41
+ def progress(self, total: int) -> Iterator[Progress]:
42
+ yield PlainProgress(total)
43
+
44
+ def run_task_app(self, main: Coroutine[Any, Any, TR]) -> TR:
45
+ return asyncio.run(main)
46
+
47
+ @contextlib.contextmanager
48
+ def suspend_task_app(self) -> Iterator[None]:
49
+ yield
50
+
51
+ @contextlib.asynccontextmanager
52
+ async def task_screen(
53
+ self, tasks: list[TaskSpec], parallel: bool
54
+ ) -> AsyncIterator[TaskScreen]:
55
+ self.total_tasks = len(tasks)
56
+ self.multiple_task_names = len({task.name for task in tasks}) > 1
57
+ self.multiple_model_names = len({str(task.model) for task in tasks}) > 1
58
+ self.tasks = []
59
+ self.parallel = parallel
60
+ try:
61
+ # Print header for task(s)
62
+ if parallel:
63
+ print(f"Running {self.total_tasks} tasks...")
64
+ yield TaskScreen()
65
+ finally:
66
+ # Print final results
67
+ if self.tasks:
68
+ self._print_results()
69
+
70
+ @contextlib.contextmanager
71
+ def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
72
+ # Print initial task information using a rich panel
73
+ panel = task_panel(
74
+ profile=profile,
75
+ show_model=True,
76
+ body="", # Empty body since we haven't started yet
77
+ subtitle=(task_config(profile), task_targets(profile)),
78
+ footer=None,
79
+ log_location=None,
80
+ )
81
+ rich.print(panel)
82
+
83
+ # Create and yield task display
84
+ task = TaskWithResult(profile, None)
85
+ self.tasks.append(task)
86
+ yield PlainTaskDisplay(
87
+ task,
88
+ show_task_names=self.multiple_task_names,
89
+ show_model_names=self.multiple_model_names,
90
+ )
91
+
92
+ def _print_results(self) -> None:
93
+ """Print final results using rich panels"""
94
+ panels = tasks_results(self.tasks)
95
+ rich.print(panels)
96
+
97
+
98
+ class PlainProgress(Progress):
99
+ def __init__(self, total: int):
100
+ self.total = total
101
+ self.current = 0
102
+
103
+ def update(self, n: int = 1) -> None:
104
+ self.current += n
105
+ # No direct printing - PlainTaskDisplay handles it
106
+
107
+ def complete(self) -> None:
108
+ self.current = self.total
109
+
110
+
111
+ class PlainTaskDisplay(TaskDisplay):
112
+ def __init__(
113
+ self, task: TaskWithResult, *, show_task_names: bool, show_model_names: bool
114
+ ):
115
+ self.task = task
116
+ self.show_task_names = show_task_names
117
+ self.show_model_names = show_model_names
118
+ self.progress_display: PlainProgress | None = None
119
+ self.samples_complete = 0
120
+ self.samples_total = 0
121
+ self.current_metrics: list[TaskDisplayMetric] | None = None
122
+ self.last_progress = 0 # Track last progress percentage
123
+
124
+ @contextlib.contextmanager
125
+ def progress(self) -> Iterator[Progress]:
126
+ self.progress_display = PlainProgress(self.task.profile.steps)
127
+ yield self.progress_display
128
+
129
+ @throttle(1)
130
+ def _print_status_throttled(self) -> None:
131
+ self._print_status()
132
+
133
+ def _print_status(self) -> None:
134
+ """Print status updates on new lines when there's meaningful progress"""
135
+ if not self.progress_display:
136
+ return
137
+
138
+ # Calculate current progress percentage
139
+ current_progress = int(
140
+ self.progress_display.current / self.progress_display.total * 100
141
+ )
142
+
143
+ # Only print on percentage changes to avoid too much output
144
+ if current_progress != self.last_progress:
145
+ status_parts: list[str] = []
146
+
147
+ # if this is parallel print task and model to distinguish (limit both to 12 chars)
148
+ MAX_NAME_WIDTH = 12
149
+ if self.show_task_names:
150
+ status_parts.append(truncate(self.task.profile.name, MAX_NAME_WIDTH))
151
+ if self.show_model_names:
152
+ status_parts.append(
153
+ truncate(str(self.task.profile.model), MAX_NAME_WIDTH)
154
+ )
155
+
156
+ # Add step progress
157
+ status_parts.append(
158
+ f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {current_progress:3d}%"
159
+ )
160
+
161
+ # Add sample progress
162
+ status_parts.append(
163
+ f"Samples: {self.samples_complete:3d}/{self.samples_total:3d}"
164
+ )
165
+
166
+ # Add metrics
167
+ if self.current_metrics:
168
+ metric_str = task_metric(self.current_metrics)
169
+ status_parts.append(metric_str)
170
+
171
+ # Add resource usage
172
+ # Very similar to ``inspect_ai._display.core.footer.task_resources``, but without
173
+ # the rich formatting added in the ``task_dict`` call
174
+ resources_dict: dict[str, str] = {}
175
+ for model, resource in concurrency_status().items():
176
+ resources_dict[model] = f"{resource[0]:2d}/{resource[1]:2d}"
177
+ resources = ", ".join(
178
+ [f"{key}: {value}" for key, value in resources_dict.items()]
179
+ )
180
+ status_parts.append(resources)
181
+
182
+ # Add rate limits
183
+ rate_limits = task_http_rate_limits()
184
+ if rate_limits:
185
+ status_parts.append(rate_limits)
186
+
187
+ # Print on new line
188
+ print(" | ".join(status_parts))
189
+
190
+ self.last_progress = current_progress
191
+
192
+ def sample_complete(self, complete: int, total: int) -> None:
193
+ self.samples_complete = complete
194
+ self.samples_total = total
195
+ self._print_status_throttled()
196
+
197
+ def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
198
+ self.current_metrics = metrics
199
+ self._print_status_throttled()
200
+
201
+ def complete(self, result: TaskResult) -> None:
202
+ self.task.result = result
203
+ self._print_status()
@@ -15,7 +15,6 @@ from inspect_ai._util.constants import CONSOLE_DISPLAY_WIDTH
15
15
  from inspect_ai.log._transcript import InputEvent, transcript
16
16
  from inspect_ai.util._display import display_type
17
17
  from inspect_ai.util._throttle import throttle
18
- from inspect_ai.util._trace import trace_enabled
19
18
 
20
19
  from ..core.config import task_config
21
20
  from ..core.display import (
@@ -130,11 +129,6 @@ class RichDisplay(Display):
130
129
  @override
131
130
  @contextlib.contextmanager
132
131
  def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
133
- # if there is no ansi display than all of the below will
134
- # be a no-op, so we print a simple text message for the task
135
- if display_type() == "plain":
136
- rich.get_console().print(task_no_ansi(profile))
137
-
138
132
  # for typechekcer
139
133
  if self.tasks is None:
140
134
  self.tasks = []
@@ -151,7 +145,8 @@ class RichDisplay(Display):
151
145
  @throttle(1)
152
146
  def _update_display(self) -> None:
153
147
  if (
154
- self.tasks is not None
148
+ display_type() != "conversation"
149
+ and self.tasks is not None
155
150
  and self.tasks
156
151
  and self.progress_ui is not None
157
152
  and self.live is not None
@@ -170,7 +165,7 @@ class RichTaskScreen(TaskScreen):
170
165
  def __init__(self, live: Live) -> None:
171
166
  self.theme = rich_theme()
172
167
  self.live = live
173
- status_text = "Working" if trace_enabled() else "Task running"
168
+ status_text = "Working" if display_type() == "conversation" else "Task running"
174
169
  self.status = self.live.console.status(
175
170
  f"[{self.theme.meta} bold]{status_text}...[/{self.theme.meta} bold]",
176
171
  spinner="clock",
@@ -189,7 +184,7 @@ class RichTaskScreen(TaskScreen):
189
184
  ) -> Iterator[Console]:
190
185
  # determine transient based on trace mode
191
186
  if transient is None:
192
- transient = not trace_enabled()
187
+ transient = display_type() != "conversation"
193
188
 
194
189
  # clear live task status and transient status
195
190
  self.live.update("", refresh=True)
@@ -284,7 +284,10 @@ class TaskScreenApp(App[TR]):
284
284
 
285
285
  def update_samples(self) -> None:
286
286
  samples_view = self.query_one(SamplesView)
287
- samples_view.set_samples(active_samples())
287
+ active_and_started_samples = [
288
+ sample for sample in active_samples() if sample.started is not None
289
+ ]
290
+ samples_view.set_samples(active_and_started_samples)
288
291
 
289
292
  def update_footer(self) -> None:
290
293
  left, right = task_footer()
@@ -0,0 +1,110 @@
1
+ from typing import Literal
2
+
3
+ from textual.app import ComposeResult
4
+ from textual.containers import HorizontalScroll
5
+ from textual.widget import Widget
6
+ from textual.widgets import Link, Static
7
+
8
+ from inspect_ai._util.port_names import get_service_by_port
9
+ from inspect_ai.util._sandbox.environment import PortMapping
10
+
11
+
12
+ class PortMappingsView(HorizontalScroll):
13
+ DEFAULT_CSS = """
14
+ PortMappingsView {
15
+ layout: grid;
16
+ height: auto;
17
+ grid-size: 4 3;
18
+ grid-columns: auto auto auto auto;
19
+ grid-gutter: 0 1;
20
+ }
21
+ """
22
+
23
+ def __init__(self, ports: list[PortMapping] | None) -> None:
24
+ super().__init__()
25
+ self.ports = ports
26
+
27
+ def compose(self) -> ComposeResult:
28
+ if not self.ports:
29
+ return
30
+ yield Static("service")
31
+ yield Static("sandbox")
32
+ yield Static("client")
33
+ yield Static("endpoint")
34
+ mappings_and_services = [
35
+ (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
36
+ for mapping in self.ports
37
+ ]
38
+ remaining_widgets = [
39
+ widget
40
+ for mapping_and_service in mappings_and_services
41
+ for widget in widgets_from_port_mapping(mapping_and_service)
42
+ ]
43
+ for widget in remaining_widgets:
44
+ yield widget
45
+
46
+
47
+ def widgets_for_port_mappings(
48
+ port_mappings: list[PortMapping] | None,
49
+ ) -> list[Widget]:
50
+ if port_mappings is None:
51
+ return []
52
+ return [
53
+ static
54
+ for mapping in [
55
+ (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
56
+ for mapping in port_mappings
57
+ ]
58
+ for static in widgets_from_port_mapping(mapping)
59
+ ]
60
+
61
+
62
+ def widgets_from_port_mapping(
63
+ mapping_service_tuple: tuple[PortMapping, str | None],
64
+ ) -> list[Widget]:
65
+ port_mapping, service = mapping_service_tuple
66
+ return [
67
+ widget
68
+ for host_mapping in port_mapping.mappings
69
+ for widget in get_row_widgets(
70
+ port_mapping.protocol,
71
+ host_mapping.host_port,
72
+ port_mapping.container_port,
73
+ service,
74
+ )
75
+ ]
76
+
77
+
78
+ def get_row_widgets(
79
+ protocol: Literal["tcp", "udp"],
80
+ host_port: int,
81
+ container_port: int,
82
+ service: str | None,
83
+ ) -> list[Widget]:
84
+ url = get_url(
85
+ host_port,
86
+ service,
87
+ )
88
+ return [
89
+ Static(service if service is not None else protocol),
90
+ Static(str(container_port)),
91
+ Static(str(host_port)),
92
+ Link(url) if url is not None else Static("asdf"),
93
+ ]
94
+
95
+
96
+ def get_url(
97
+ host_port: int,
98
+ service: str | None,
99
+ ) -> str | None:
100
+ if service is not None:
101
+ if service == "noVNC":
102
+ return f"http://localhost:{host_port}?view_only=true&autoconnect=true&resize=scale"
103
+
104
+ if service.startswith("HTTP"):
105
+ return f"https://localhost:{host_port}"
106
+
107
+ if service.startswith("VNC"):
108
+ return f"vnc://localhost:{host_port}"
109
+
110
+ return None