inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -9
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +79 -12
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/eval.py +10 -1
  13. inspect_ai/_eval/loader.py +79 -19
  14. inspect_ai/_eval/registry.py +6 -0
  15. inspect_ai/_eval/score.py +3 -1
  16. inspect_ai/_eval/task/results.py +51 -22
  17. inspect_ai/_eval/task/run.py +47 -13
  18. inspect_ai/_eval/task/sandbox.py +10 -5
  19. inspect_ai/_util/constants.py +1 -0
  20. inspect_ai/_util/port_names.py +61 -0
  21. inspect_ai/_util/text.py +23 -0
  22. inspect_ai/_view/www/App.css +31 -1
  23. inspect_ai/_view/www/dist/assets/index.css +31 -1
  24. inspect_ai/_view/www/dist/assets/index.js +25498 -2044
  25. inspect_ai/_view/www/log-schema.json +32 -2
  26. inspect_ai/_view/www/package.json +2 -0
  27. inspect_ai/_view/www/src/App.mjs +14 -16
  28. inspect_ai/_view/www/src/Types.mjs +1 -2
  29. inspect_ai/_view/www/src/api/Types.ts +133 -0
  30. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  31. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  32. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  33. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  34. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  35. inspect_ai/_view/www/src/api/index.ts +51 -0
  36. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  37. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  38. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  39. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  40. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  41. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  42. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  43. inspect_ai/_view/www/src/index.js +77 -4
  44. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  45. inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
  46. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
  47. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  48. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  49. inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
  50. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  51. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  52. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
  53. inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
  54. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  55. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  56. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
  76. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  77. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
  78. inspect_ai/_view/www/vite.config.js +7 -0
  79. inspect_ai/_view/www/yarn.lock +116 -0
  80. inspect_ai/approval/_human/__init__.py +0 -0
  81. inspect_ai/approval/_human/manager.py +1 -1
  82. inspect_ai/approval/_policy.py +12 -6
  83. inspect_ai/log/_log.py +1 -1
  84. inspect_ai/log/_samples.py +16 -0
  85. inspect_ai/log/_transcript.py +4 -1
  86. inspect_ai/model/_call_tools.py +59 -0
  87. inspect_ai/model/_conversation.py +16 -7
  88. inspect_ai/model/_generate_config.py +12 -12
  89. inspect_ai/model/_model.py +117 -18
  90. inspect_ai/model/_model_output.py +22 -2
  91. inspect_ai/model/_openai.py +383 -0
  92. inspect_ai/model/_providers/anthropic.py +152 -55
  93. inspect_ai/model/_providers/azureai.py +21 -21
  94. inspect_ai/model/_providers/bedrock.py +37 -40
  95. inspect_ai/model/_providers/goodfire.py +248 -0
  96. inspect_ai/model/_providers/google.py +46 -54
  97. inspect_ai/model/_providers/groq.py +7 -3
  98. inspect_ai/model/_providers/hf.py +6 -0
  99. inspect_ai/model/_providers/mistral.py +13 -12
  100. inspect_ai/model/_providers/openai.py +51 -218
  101. inspect_ai/model/_providers/openai_o1.py +11 -12
  102. inspect_ai/model/_providers/providers.py +23 -1
  103. inspect_ai/model/_providers/together.py +12 -12
  104. inspect_ai/model/_providers/util/__init__.py +2 -3
  105. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  106. inspect_ai/model/_providers/util/llama31.py +1 -1
  107. inspect_ai/model/_providers/util/util.py +0 -76
  108. inspect_ai/model/_providers/vertex.py +1 -4
  109. inspect_ai/scorer/_metric.py +3 -0
  110. inspect_ai/scorer/_reducer/reducer.py +1 -1
  111. inspect_ai/scorer/_scorer.py +4 -3
  112. inspect_ai/solver/__init__.py +4 -5
  113. inspect_ai/solver/_basic_agent.py +1 -1
  114. inspect_ai/solver/_bridge/__init__.py +3 -0
  115. inspect_ai/solver/_bridge/bridge.py +100 -0
  116. inspect_ai/solver/_bridge/patch.py +170 -0
  117. inspect_ai/solver/_prompt.py +35 -5
  118. inspect_ai/solver/_solver.py +6 -0
  119. inspect_ai/solver/_task_state.py +80 -38
  120. inspect_ai/tool/__init__.py +2 -0
  121. inspect_ai/tool/_tool.py +12 -1
  122. inspect_ai/tool/_tool_call.py +10 -0
  123. inspect_ai/tool/_tool_def.py +16 -5
  124. inspect_ai/tool/_tool_with.py +21 -4
  125. inspect_ai/tool/beta/__init__.py +5 -0
  126. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  127. inspect_ai/tool/beta/_computer/_common.py +133 -0
  128. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  129. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  130. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  131. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  134. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  135. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  136. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  137. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  138. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  139. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  144. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  145. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  146. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  147. inspect_ai/util/__init__.py +2 -0
  148. inspect_ai/util/_display.py +5 -0
  149. inspect_ai/util/_limit.py +26 -0
  150. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  153. inspect_ai/util/_sandbox/environment.py +14 -0
  154. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  155. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
  156. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  157. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  158. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  159. inspect_ai/_view/www/src/api/index.mjs +0 -49
  160. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  161. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  162. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  163. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  164. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  165. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  166. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/common.py CHANGED
@@ -2,6 +2,7 @@ import functools
2
2
  from typing import Any, Callable, Literal, cast
3
3
 
4
4
  import click
5
+ import rich
5
6
  from typing_extensions import TypedDict
6
7
 
7
8
  from inspect_ai._util.constants import (
@@ -105,7 +106,8 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
105
106
  def process_common_options(options: CommonOptions) -> None:
106
107
  # propagate display
107
108
  if options["no_ansi"]:
108
- display = "plain"
109
+ display = "rich"
110
+ rich.reconfigure(no_color=True)
109
111
  else:
110
112
  display = options["display"].lower().strip()
111
113
  init_display_type(display)
inspect_ai/_cli/eval.py CHANGED
@@ -314,12 +314,6 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
314
314
  help="Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
315
315
  envvar="INSPECT_EVAL_STOP_SEQS",
316
316
  )
317
- @click.option(
318
- "--suffix",
319
- type=str,
320
- help="The suffix that comes after a completion of inserted text. OpenAI only.",
321
- envvar="INSPECT_EVAL_SUFFIX",
322
- )
323
317
  @click.option(
324
318
  "--temperature",
325
319
  type=float,
@@ -348,13 +342,13 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
348
342
  "--logprobs",
349
343
  type=bool,
350
344
  is_flag=True,
351
- help="Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
345
+ help="Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
352
346
  envvar="INSPECT_EVAL_LOGPROBS",
353
347
  )
354
348
  @click.option(
355
349
  "--top-logprobs",
356
350
  type=int,
357
- help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, TogetherAI, Huggingface, and vLLM only.",
351
+ help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, TogetherAI, Huggingface, and vLLM only.",
358
352
  envvar="INSPECT_EVAL_TOP_LOGPROBS",
359
353
  )
360
354
  @click.option(
@@ -365,6 +359,14 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
365
359
  help="Whether to enable parallel function calling during tool use (defaults to True) OpenAI and Groq only.",
366
360
  envvar="INSPECT_EVAL_PARALLEL_TOOL_CALLS",
367
361
  )
362
+ @click.option(
363
+ "--internal-tools/--no-internal-tools",
364
+ type=bool,
365
+ is_flag=True,
366
+ default=True,
367
+ help="Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic).",
368
+ envvar="INSPECT_EVAL_INTERNAL_TOOLS",
369
+ )
368
370
  @click.option(
369
371
  "--max-tool-output",
370
372
  type=int,
@@ -431,7 +433,6 @@ def eval_command(
431
433
  logit_bias: str | None,
432
434
  seed: int | None,
433
435
  stop_seqs: str | None,
434
- suffix: str | None,
435
436
  temperature: float | None,
436
437
  top_p: float | None,
437
438
  top_k: int | None,
@@ -439,6 +440,7 @@ def eval_command(
439
440
  logprobs: bool | None,
440
441
  top_logprobs: int | None,
441
442
  parallel_tool_calls: bool | None,
443
+ internal_tools: bool | None,
442
444
  max_tool_output: int | None,
443
445
  cache_prompt: str | None,
444
446
  reasoning_effort: str | None,
@@ -598,6 +600,7 @@ def eval_set_command(
598
600
  logprobs: bool | None,
599
601
  top_logprobs: int | None,
600
602
  parallel_tool_calls: bool | None,
603
+ internal_tools: bool | None,
601
604
  max_tool_output: int | None,
602
605
  cache_prompt: str | None,
603
606
  reasoning_effort: str | None,
@@ -836,6 +839,9 @@ def config_from_locals(locals: dict[str, Any]) -> GenerateConfigArgs:
836
839
  if key == "parallel_tool_calls":
837
840
  if value is not False:
838
841
  value = None
842
+ if key == "internal_tools":
843
+ if value is not False:
844
+ value = None
839
845
  config[key] = value # type: ignore
840
846
  return config
841
847
 
@@ -5,6 +5,7 @@ import rich
5
5
 
6
6
  from inspect_ai.util._display import display_type
7
7
 
8
+ from ..plain.display import PlainDisplay
8
9
  from ..rich.display import RichDisplay
9
10
  from ..textual.display import TextualDisplay
10
11
  from .display import Display, TaskScreen
@@ -13,7 +14,9 @@ from .display import Display, TaskScreen
13
14
  def display() -> Display:
14
15
  global _active_display
15
16
  if _active_display is None:
16
- if (
17
+ if display_type() == "plain":
18
+ _active_display = PlainDisplay()
19
+ elif (
17
20
  display_type() == "full"
18
21
  and sys.stdout.isatty()
19
22
  and not rich.get_console().is_jupyter
@@ -13,14 +13,14 @@ def task_config(
13
13
  value = task_args[key]
14
14
  if is_registry_dict(value):
15
15
  task_args[key] = value["name"]
16
- config = task_args | dict(profile.eval_config.model_dump(exclude_none=True))
16
+ config = dict(profile.eval_config.model_dump(exclude_none=True)) | task_args
17
17
  if generate_config:
18
- config = config | dict(profile.generate_config.model_dump(exclude_none=True))
18
+ config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
19
19
  if profile.tags:
20
20
  config["tags"] = ",".join(profile.tags)
21
21
  config_print: list[str] = []
22
22
  for name, value in config.items():
23
- if name == "approval":
23
+ if name == "approval" and isinstance(value, dict):
24
24
  config_print.append(
25
25
  f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
26
26
  )
@@ -50,9 +50,13 @@ def task_panel(
50
50
  table.add_row(subtitle_table)
51
51
 
52
52
  # main progress and task info
53
- table.add_row()
54
- table.add_row(body)
55
- table.add_row()
53
+ if body:
54
+ table.add_row()
55
+ table.add_row(body)
56
+
57
+ # spacing if there is more ocontent
58
+ if footer or log_location:
59
+ table.add_row()
56
60
 
57
61
  # footer if specified
58
62
  if footer:
File without changes
@@ -0,0 +1,203 @@
1
+ import asyncio
2
+ import contextlib
3
+ from typing import Any, AsyncIterator, Coroutine, Iterator
4
+
5
+ import rich
6
+
7
+ from inspect_ai._display.core.rich import rich_initialise
8
+ from inspect_ai._util.text import truncate
9
+ from inspect_ai._util.throttle import throttle
10
+
11
+ from ...util._concurrency import concurrency_status
12
+ from ..core.config import task_config
13
+ from ..core.display import (
14
+ TR,
15
+ Display,
16
+ Progress,
17
+ TaskDisplay,
18
+ TaskDisplayMetric,
19
+ TaskProfile,
20
+ TaskResult,
21
+ TaskScreen,
22
+ TaskSpec,
23
+ TaskWithResult,
24
+ )
25
+ from ..core.footer import task_http_rate_limits
26
+ from ..core.panel import task_panel, task_targets
27
+ from ..core.results import task_metric, tasks_results
28
+
29
+
30
+ class PlainDisplay(Display):
31
+ def __init__(self) -> None:
32
+ self.total_tasks: int = 0
33
+ self.tasks: list[TaskWithResult] = []
34
+ self.parallel = False
35
+ rich_initialise()
36
+
37
+ def print(self, message: str) -> None:
38
+ print(message)
39
+
40
+ @contextlib.contextmanager
41
+ def progress(self, total: int) -> Iterator[Progress]:
42
+ yield PlainProgress(total)
43
+
44
+ def run_task_app(self, main: Coroutine[Any, Any, TR]) -> TR:
45
+ return asyncio.run(main)
46
+
47
+ @contextlib.contextmanager
48
+ def suspend_task_app(self) -> Iterator[None]:
49
+ yield
50
+
51
+ @contextlib.asynccontextmanager
52
+ async def task_screen(
53
+ self, tasks: list[TaskSpec], parallel: bool
54
+ ) -> AsyncIterator[TaskScreen]:
55
+ self.total_tasks = len(tasks)
56
+ self.multiple_task_names = len({task.name for task in tasks}) > 1
57
+ self.multiple_model_names = len({str(task.model) for task in tasks}) > 1
58
+ self.tasks = []
59
+ self.parallel = parallel
60
+ try:
61
+ # Print header for task(s)
62
+ if parallel:
63
+ print(f"Running {self.total_tasks} tasks...")
64
+ yield TaskScreen()
65
+ finally:
66
+ # Print final results
67
+ if self.tasks:
68
+ self._print_results()
69
+
70
+ @contextlib.contextmanager
71
+ def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
72
+ # Print initial task information using a rich panel
73
+ panel = task_panel(
74
+ profile=profile,
75
+ show_model=True,
76
+ body="", # Empty body since we haven't started yet
77
+ subtitle=(task_config(profile), task_targets(profile)),
78
+ footer=None,
79
+ log_location=None,
80
+ )
81
+ rich.print(panel)
82
+
83
+ # Create and yield task display
84
+ task = TaskWithResult(profile, None)
85
+ self.tasks.append(task)
86
+ yield PlainTaskDisplay(
87
+ task,
88
+ show_task_names=self.multiple_task_names,
89
+ show_model_names=self.multiple_model_names,
90
+ )
91
+
92
+ def _print_results(self) -> None:
93
+ """Print final results using rich panels"""
94
+ panels = tasks_results(self.tasks)
95
+ rich.print(panels)
96
+
97
+
98
+ class PlainProgress(Progress):
99
+ def __init__(self, total: int):
100
+ self.total = total
101
+ self.current = 0
102
+
103
+ def update(self, n: int = 1) -> None:
104
+ self.current += n
105
+ # No direct printing - PlainTaskDisplay handles it
106
+
107
+ def complete(self) -> None:
108
+ self.current = self.total
109
+
110
+
111
+ class PlainTaskDisplay(TaskDisplay):
112
+ def __init__(
113
+ self, task: TaskWithResult, *, show_task_names: bool, show_model_names: bool
114
+ ):
115
+ self.task = task
116
+ self.show_task_names = show_task_names
117
+ self.show_model_names = show_model_names
118
+ self.progress_display: PlainProgress | None = None
119
+ self.samples_complete = 0
120
+ self.samples_total = 0
121
+ self.current_metrics: list[TaskDisplayMetric] | None = None
122
+ self.last_progress = 0 # Track last progress percentage
123
+
124
+ @contextlib.contextmanager
125
+ def progress(self) -> Iterator[Progress]:
126
+ self.progress_display = PlainProgress(self.task.profile.steps)
127
+ yield self.progress_display
128
+
129
+ @throttle(1)
130
+ def _print_status_throttled(self) -> None:
131
+ self._print_status()
132
+
133
+ def _print_status(self) -> None:
134
+ """Print status updates on new lines when there's meaningful progress"""
135
+ if not self.progress_display:
136
+ return
137
+
138
+ # Calculate current progress percentage
139
+ current_progress = int(
140
+ self.progress_display.current / self.progress_display.total * 100
141
+ )
142
+
143
+ # Only print on percentage changes to avoid too much output
144
+ if current_progress != self.last_progress:
145
+ status_parts: list[str] = []
146
+
147
+ # if this is parallel print task and model to distinguish (limit both to 12 chars)
148
+ MAX_NAME_WIDTH = 12
149
+ if self.show_task_names:
150
+ status_parts.append(truncate(self.task.profile.name, MAX_NAME_WIDTH))
151
+ if self.show_model_names:
152
+ status_parts.append(
153
+ truncate(str(self.task.profile.model), MAX_NAME_WIDTH)
154
+ )
155
+
156
+ # Add step progress
157
+ status_parts.append(
158
+ f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {current_progress:3d}%"
159
+ )
160
+
161
+ # Add sample progress
162
+ status_parts.append(
163
+ f"Samples: {self.samples_complete:3d}/{self.samples_total:3d}"
164
+ )
165
+
166
+ # Add metrics
167
+ if self.current_metrics:
168
+ metric_str = task_metric(self.current_metrics)
169
+ status_parts.append(metric_str)
170
+
171
+ # Add resource usage
172
+ # Very similar to ``inspect_ai._display.core.footer.task_resources``, but without
173
+ # the rich formatting added in the ``task_dict`` call
174
+ resources_dict: dict[str, str] = {}
175
+ for model, resource in concurrency_status().items():
176
+ resources_dict[model] = f"{resource[0]:2d}/{resource[1]:2d}"
177
+ resources = ", ".join(
178
+ [f"{key}: {value}" for key, value in resources_dict.items()]
179
+ )
180
+ status_parts.append(resources)
181
+
182
+ # Add rate limits
183
+ rate_limits = task_http_rate_limits()
184
+ if rate_limits:
185
+ status_parts.append(rate_limits)
186
+
187
+ # Print on new line
188
+ print(" | ".join(status_parts))
189
+
190
+ self.last_progress = current_progress
191
+
192
+ def sample_complete(self, complete: int, total: int) -> None:
193
+ self.samples_complete = complete
194
+ self.samples_total = total
195
+ self._print_status_throttled()
196
+
197
+ def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
198
+ self.current_metrics = metrics
199
+ self._print_status_throttled()
200
+
201
+ def complete(self, result: TaskResult) -> None:
202
+ self.task.result = result
203
+ self._print_status()
@@ -129,11 +129,6 @@ class RichDisplay(Display):
129
129
  @override
130
130
  @contextlib.contextmanager
131
131
  def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
132
- # if there is no ansi display than all of the below will
133
- # be a no-op, so we print a simple text message for the task
134
- if display_type() == "plain":
135
- rich.get_console().print(task_no_ansi(profile))
136
-
137
132
  # for typechekcer
138
133
  if self.tasks is None:
139
134
  self.tasks = []
@@ -0,0 +1,110 @@
1
+ from typing import Literal
2
+
3
+ from textual.app import ComposeResult
4
+ from textual.containers import HorizontalScroll
5
+ from textual.widget import Widget
6
+ from textual.widgets import Link, Static
7
+
8
+ from inspect_ai._util.port_names import get_service_by_port
9
+ from inspect_ai.util._sandbox.environment import PortMapping
10
+
11
+
12
+ class PortMappingsView(HorizontalScroll):
13
+ DEFAULT_CSS = """
14
+ PortMappingsView {
15
+ layout: grid;
16
+ height: auto;
17
+ grid-size: 4 3;
18
+ grid-columns: auto auto auto auto;
19
+ grid-gutter: 0 1;
20
+ }
21
+ """
22
+
23
+ def __init__(self, ports: list[PortMapping] | None) -> None:
24
+ super().__init__()
25
+ self.ports = ports
26
+
27
+ def compose(self) -> ComposeResult:
28
+ if not self.ports:
29
+ return
30
+ yield Static("service")
31
+ yield Static("sandbox")
32
+ yield Static("client")
33
+ yield Static("endpoint")
34
+ mappings_and_services = [
35
+ (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
36
+ for mapping in self.ports
37
+ ]
38
+ remaining_widgets = [
39
+ widget
40
+ for mapping_and_service in mappings_and_services
41
+ for widget in widgets_from_port_mapping(mapping_and_service)
42
+ ]
43
+ for widget in remaining_widgets:
44
+ yield widget
45
+
46
+
47
+ def widgets_for_port_mappings(
48
+ port_mappings: list[PortMapping] | None,
49
+ ) -> list[Widget]:
50
+ if port_mappings is None:
51
+ return []
52
+ return [
53
+ static
54
+ for mapping in [
55
+ (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
56
+ for mapping in port_mappings
57
+ ]
58
+ for static in widgets_from_port_mapping(mapping)
59
+ ]
60
+
61
+
62
+ def widgets_from_port_mapping(
63
+ mapping_service_tuple: tuple[PortMapping, str | None],
64
+ ) -> list[Widget]:
65
+ port_mapping, service = mapping_service_tuple
66
+ return [
67
+ widget
68
+ for host_mapping in port_mapping.mappings
69
+ for widget in get_row_widgets(
70
+ port_mapping.protocol,
71
+ host_mapping.host_port,
72
+ port_mapping.container_port,
73
+ service,
74
+ )
75
+ ]
76
+
77
+
78
+ def get_row_widgets(
79
+ protocol: Literal["tcp", "udp"],
80
+ host_port: int,
81
+ container_port: int,
82
+ service: str | None,
83
+ ) -> list[Widget]:
84
+ url = get_url(
85
+ host_port,
86
+ service,
87
+ )
88
+ return [
89
+ Static(service if service is not None else protocol),
90
+ Static(str(container_port)),
91
+ Static(str(host_port)),
92
+ Link(url) if url is not None else Static("asdf"),
93
+ ]
94
+
95
+
96
+ def get_url(
97
+ host_port: int,
98
+ service: str | None,
99
+ ) -> str | None:
100
+ if service is not None:
101
+ if service == "noVNC":
102
+ return f"http://localhost:{host_port}?view_only=true&autoconnect=true&resize=scale"
103
+
104
+ if service.startswith("HTTP"):
105
+ return f"https://localhost:{host_port}"
106
+
107
+ if service.startswith("VNC"):
108
+ return f"vnc://localhost:{host_port}"
109
+
110
+ return None
@@ -5,29 +5,28 @@ from rich.console import RenderableType
5
5
  from rich.table import Table
6
6
  from rich.text import Text
7
7
  from textual.app import ComposeResult
8
- from textual.containers import (
9
- Horizontal,
10
- HorizontalGroup,
11
- Vertical,
12
- VerticalGroup,
13
- )
8
+ from textual.containers import Horizontal, HorizontalGroup, Vertical, VerticalGroup
14
9
  from textual.reactive import reactive
15
10
  from textual.widget import Widget
16
11
  from textual.widgets import (
17
12
  Button,
18
13
  Collapsible,
14
+ Link,
19
15
  LoadingIndicator,
20
16
  OptionList,
21
17
  Static,
22
18
  )
23
19
  from textual.widgets.option_list import Option, Separator
24
20
 
21
+ from inspect_ai._display.textual.widgets.port_mappings import get_url
25
22
  from inspect_ai._util.format import format_progress_time
23
+ from inspect_ai._util.port_names import get_service_by_port
26
24
  from inspect_ai._util.registry import registry_unqualified_name
27
25
  from inspect_ai.log._samples import ActiveSample
28
26
  from inspect_ai.log._transcript import ToolEvent
29
27
 
30
28
  from .clock import Clock
29
+ from .sandbox import SandboxView
31
30
  from .transcript import TranscriptView
32
31
 
33
32
 
@@ -74,6 +73,7 @@ class SamplesView(Widget):
74
73
 
75
74
  async def set_highlighted_sample(self, highlighted: int | None) -> None:
76
75
  sample_info = self.query_one(SampleInfo)
76
+ sample_vnc = self.query_one(SampleVNC)
77
77
  transcript_view = self.query_one(TranscriptView)
78
78
  sample_toolbar = self.query_one(SampleToolbar)
79
79
  if highlighted is not None:
@@ -83,12 +83,14 @@ class SamplesView(Widget):
83
83
  transcript_view.display = True
84
84
  sample_toolbar.display = True
85
85
  await sample_info.sync_sample(sample)
86
+ await sample_vnc.sync_sample(sample)
86
87
  await transcript_view.sync_sample(sample)
87
88
  await sample_toolbar.sync_sample(sample)
88
89
  return
89
90
 
90
91
  # otherwise hide ui
91
92
  sample_info.display = False
93
+ sample_vnc.display = False
92
94
  transcript_view.display = False
93
95
  sample_toolbar.display = False
94
96
 
@@ -182,10 +184,59 @@ class SamplesList(OptionList):
182
184
  return None
183
185
 
184
186
 
185
- class SampleInfo(Horizontal):
187
+ class SampleVNC(Horizontal):
188
+ DEFAULT_CSS = """
189
+ SampleVNC {
190
+ layout: grid;
191
+ grid-size: 2 1;
192
+ grid-columns: auto 1fr;
193
+ }
194
+ SampleVNC Static {
195
+ color: $secondary;
196
+ }
197
+ SampleVNC Link {
198
+ color: $accent;
199
+ }
200
+ """
201
+
202
+ def __init__(self) -> None:
203
+ super().__init__()
204
+ self._sample: ActiveSample | None = None
205
+
206
+ def compose(self) -> ComposeResult:
207
+ yield Static("VNC: ")
208
+ yield Link("")
209
+
210
+ async def sync_sample(self, sample: ActiveSample) -> None:
211
+ if sample == self._sample:
212
+ return
213
+
214
+ # defult to hidden (show if we find a vnc connection)
215
+ self.display = False
216
+
217
+ # is there a vnc connection? if so populate
218
+ for connection in [c for c in sample.sandboxes.values() if c.ports]:
219
+ for port in connection.ports or []:
220
+ service = get_service_by_port(port.container_port, port.protocol)
221
+ if service == "noVNC" and port.mappings:
222
+ host_mappings = port.mappings
223
+ link = self.query_one(Link)
224
+ vnc_url = get_url(host_mappings[0].host_port, service)
225
+ if vnc_url:
226
+ link.text = vnc_url
227
+ link.url = link.text
228
+ self.display = True
229
+ break
230
+
231
+
232
+ class SampleInfo(Vertical):
186
233
  DEFAULT_CSS = """
187
234
  SampleInfo {
188
235
  color: $text-muted;
236
+ layout: grid;
237
+ grid-size: 1 2;
238
+ grid-rows: auto 1;
239
+ grid-gutter: 1;
189
240
  }
190
241
  SampleInfo Collapsible {
191
242
  padding: 0;
@@ -218,11 +269,13 @@ class SampleInfo(Horizontal):
218
269
  def __init__(self) -> None:
219
270
  super().__init__()
220
271
  self._sample: ActiveSample | None = None
272
+ self._sandbox_count: int | None = None
221
273
 
222
274
  def compose(self) -> ComposeResult:
223
275
  with Collapsible(title=""):
224
276
  yield SampleLimits()
225
277
  yield SandboxesView()
278
+ yield SampleVNC()
226
279
 
227
280
  async def sync_sample(self, sample: ActiveSample | None) -> None:
228
281
  if sample is None:
@@ -233,12 +286,14 @@ class SampleInfo(Horizontal):
233
286
  limits = self.query_one(SampleLimits)
234
287
  await limits.sync_sample(sample)
235
288
 
289
+ new_sandbox_count = len(sample.sandboxes)
236
290
  # bail if we've already processed this sample
237
- if self._sample == sample:
291
+ if self._sample == sample and self._sandbox_count == new_sandbox_count:
238
292
  return
239
293
 
240
294
  # set sample
241
295
  self._sample = sample
296
+ self._sandbox_count = new_sandbox_count
242
297
 
243
298
  # update UI
244
299
  self.display = True
@@ -246,6 +301,7 @@ class SampleInfo(Horizontal):
246
301
  self.query_one(Collapsible).title = title
247
302
  sandboxes = self.query_one(SandboxesView)
248
303
  await sandboxes.sync_sample(sample)
304
+ await self.query_one(SampleVNC).sync_sample(sample)
249
305
 
250
306
 
251
307
  class SampleLimits(Widget):
@@ -295,6 +351,9 @@ class SandboxesView(Vertical):
295
351
  background: transparent;
296
352
  height: auto;
297
353
  }
354
+ #sandboxes-list {
355
+ height: auto;
356
+ }
298
357
  SandboxesView Static {
299
358
  background: transparent;
300
359
  }
@@ -312,16 +371,24 @@ class SandboxesView(Vertical):
312
371
 
313
372
  async def sync_sample(self, sample: ActiveSample) -> None:
314
373
  if len(sample.sandboxes) > 0:
374
+ multiple_sandboxes = len(sample.sandboxes) > 1
315
375
  self.display = True
316
376
  sandboxes_caption = cast(Static, self.query_one("#sandboxes-caption"))
317
- sandboxes_caption.update("[bold]sandbox containers:[/bold]")
377
+ sandboxes_caption.update(
378
+ f"[bold]sandbox container{'s' if multiple_sandboxes else ''}:[/bold]"
379
+ )
318
380
 
319
381
  sandboxes_list = self.query_one("#sandboxes-list")
320
382
  await sandboxes_list.remove_children()
383
+
321
384
  await sandboxes_list.mount_all(
322
- [Static(sandbox.command) for sandbox in sample.sandboxes.values()]
385
+ [
386
+ SandboxView(connection, name if multiple_sandboxes else None)
387
+ for name, connection in sample.sandboxes.items()
388
+ ]
323
389
  )
324
- sandboxes_list.mount(
390
+
391
+ await sandboxes_list.mount(
325
392
  Static(
326
393
  "[italic]Hold down Alt (or Option) to select text for copying[/italic]",
327
394
  classes="clipboard-message",
@@ -346,7 +413,7 @@ class SampleToolbar(Horizontal):
346
413
  grid-columns: auto auto 1fr auto auto;
347
414
  }}
348
415
  SampleToolbar #{STATUS_GROUP} {{
349
- min-width: 20;
416
+ width: 22;
350
417
  }}
351
418
  SampleToolbar Button {{
352
419
  margin-bottom: 1;