inspect-ai 0.3.58__py3-none-any.whl → 0.3.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -2
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +78 -11
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/score.py +1 -0
  13. inspect_ai/_eval/task/results.py +50 -22
  14. inspect_ai/_eval/task/run.py +41 -7
  15. inspect_ai/_eval/task/sandbox.py +10 -5
  16. inspect_ai/_util/constants.py +1 -0
  17. inspect_ai/_util/port_names.py +61 -0
  18. inspect_ai/_util/text.py +23 -0
  19. inspect_ai/_view/www/App.css +31 -1
  20. inspect_ai/_view/www/dist/assets/index.css +31 -1
  21. inspect_ai/_view/www/dist/assets/index.js +25344 -1849
  22. inspect_ai/_view/www/log-schema.json +32 -2
  23. inspect_ai/_view/www/package.json +2 -0
  24. inspect_ai/_view/www/src/App.mjs +8 -10
  25. inspect_ai/_view/www/src/Types.mjs +0 -1
  26. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  27. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  28. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  29. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  30. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  31. inspect_ai/_view/www/src/index.js +75 -2
  32. inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
  33. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
  34. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  35. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  36. inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
  37. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  38. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +24 -12
  39. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
  40. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  41. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  42. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  43. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  44. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  45. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  46. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  47. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  48. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  49. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  50. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  51. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  52. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  53. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  54. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  55. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  56. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  57. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  58. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  59. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  60. inspect_ai/_view/www/src/utils/Json.mjs +12 -6
  61. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
  62. inspect_ai/_view/www/vite.config.js +7 -0
  63. inspect_ai/_view/www/yarn.lock +116 -0
  64. inspect_ai/approval/_human/__init__.py +0 -0
  65. inspect_ai/approval/_policy.py +12 -6
  66. inspect_ai/log/_log.py +1 -1
  67. inspect_ai/log/_samples.py +16 -0
  68. inspect_ai/log/_transcript.py +4 -1
  69. inspect_ai/model/_call_tools.py +4 -0
  70. inspect_ai/model/_conversation.py +20 -8
  71. inspect_ai/model/_generate_config.py +10 -4
  72. inspect_ai/model/_model.py +117 -18
  73. inspect_ai/model/_model_output.py +7 -2
  74. inspect_ai/model/_providers/anthropic.py +100 -44
  75. inspect_ai/model/_providers/azureai.py +20 -20
  76. inspect_ai/model/_providers/bedrock.py +37 -40
  77. inspect_ai/model/_providers/google.py +46 -54
  78. inspect_ai/model/_providers/mistral.py +11 -11
  79. inspect_ai/model/_providers/openai.py +15 -16
  80. inspect_ai/model/_providers/openai_o1.py +9 -8
  81. inspect_ai/model/_providers/providers.py +1 -1
  82. inspect_ai/model/_providers/together.py +8 -8
  83. inspect_ai/model/_providers/vertex.py +1 -4
  84. inspect_ai/scorer/_reducer/reducer.py +1 -1
  85. inspect_ai/scorer/_scorer.py +2 -2
  86. inspect_ai/solver/__init__.py +2 -5
  87. inspect_ai/solver/_prompt.py +35 -5
  88. inspect_ai/solver/_task_state.py +80 -38
  89. inspect_ai/tool/__init__.py +2 -0
  90. inspect_ai/tool/_tool.py +12 -1
  91. inspect_ai/tool/_tool_call.py +10 -0
  92. inspect_ai/tool/_tool_def.py +16 -5
  93. inspect_ai/tool/_tool_with.py +21 -4
  94. inspect_ai/tool/beta/__init__.py +5 -0
  95. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  96. inspect_ai/tool/beta/_computer/_common.py +133 -0
  97. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  98. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  99. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  100. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  101. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  102. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  103. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  104. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  105. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  106. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  107. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  108. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  109. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  110. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  111. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  112. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  113. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  114. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  115. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  116. inspect_ai/util/__init__.py +2 -0
  117. inspect_ai/util/_limit.py +26 -0
  118. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  119. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  120. inspect_ai/util/_sandbox/environment.py +14 -0
  121. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
  122. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +126 -98
  123. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  124. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
  125. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
  126. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
  127. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/common.py CHANGED
@@ -2,6 +2,7 @@ import functools
2
2
  from typing import Any, Callable, Literal, cast
3
3
 
4
4
  import click
5
+ import rich
5
6
  from typing_extensions import TypedDict
6
7
 
7
8
  from inspect_ai._util.constants import (
@@ -105,7 +106,8 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
105
106
  def process_common_options(options: CommonOptions) -> None:
106
107
  # propagate display
107
108
  if options["no_ansi"]:
108
- display = "plain"
109
+ display = "rich"
110
+ rich.reconfigure(no_color=True)
109
111
  else:
110
112
  display = options["display"].lower().strip()
111
113
  init_display_type(display)
inspect_ai/_cli/eval.py CHANGED
@@ -348,13 +348,13 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
348
348
  "--logprobs",
349
349
  type=bool,
350
350
  is_flag=True,
351
- help="Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
351
+ help="Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
352
352
  envvar="INSPECT_EVAL_LOGPROBS",
353
353
  )
354
354
  @click.option(
355
355
  "--top-logprobs",
356
356
  type=int,
357
- help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, TogetherAI, Huggingface, and vLLM only.",
357
+ help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, TogetherAI, Huggingface, and vLLM only.",
358
358
  envvar="INSPECT_EVAL_TOP_LOGPROBS",
359
359
  )
360
360
  @click.option(
@@ -365,6 +365,14 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
365
365
  help="Whether to enable parallel function calling during tool use (defaults to True) OpenAI and Groq only.",
366
366
  envvar="INSPECT_EVAL_PARALLEL_TOOL_CALLS",
367
367
  )
368
+ @click.option(
369
+ "--internal-tools/--no-internal-tools",
370
+ type=bool,
371
+ is_flag=True,
372
+ default=True,
373
+ help="Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic).",
374
+ envvar="INSPECT_EVAL_INTERNAL_TOOLS",
375
+ )
368
376
  @click.option(
369
377
  "--max-tool-output",
370
378
  type=int,
@@ -439,6 +447,7 @@ def eval_command(
439
447
  logprobs: bool | None,
440
448
  top_logprobs: int | None,
441
449
  parallel_tool_calls: bool | None,
450
+ internal_tools: bool | None,
442
451
  max_tool_output: int | None,
443
452
  cache_prompt: str | None,
444
453
  reasoning_effort: str | None,
@@ -598,6 +607,7 @@ def eval_set_command(
598
607
  logprobs: bool | None,
599
608
  top_logprobs: int | None,
600
609
  parallel_tool_calls: bool | None,
610
+ internal_tools: bool | None,
601
611
  max_tool_output: int | None,
602
612
  cache_prompt: str | None,
603
613
  reasoning_effort: str | None,
@@ -836,6 +846,9 @@ def config_from_locals(locals: dict[str, Any]) -> GenerateConfigArgs:
836
846
  if key == "parallel_tool_calls":
837
847
  if value is not False:
838
848
  value = None
849
+ if key == "internal_tools":
850
+ if value is not False:
851
+ value = None
839
852
  config[key] = value # type: ignore
840
853
  return config
841
854
 
@@ -5,6 +5,7 @@ import rich
5
5
 
6
6
  from inspect_ai.util._display import display_type
7
7
 
8
+ from ..plain.display import PlainDisplay
8
9
  from ..rich.display import RichDisplay
9
10
  from ..textual.display import TextualDisplay
10
11
  from .display import Display, TaskScreen
@@ -13,7 +14,9 @@ from .display import Display, TaskScreen
13
14
  def display() -> Display:
14
15
  global _active_display
15
16
  if _active_display is None:
16
- if (
17
+ if display_type() == "plain":
18
+ _active_display = PlainDisplay()
19
+ elif (
17
20
  display_type() == "full"
18
21
  and sys.stdout.isatty()
19
22
  and not rich.get_console().is_jupyter
@@ -13,14 +13,14 @@ def task_config(
13
13
  value = task_args[key]
14
14
  if is_registry_dict(value):
15
15
  task_args[key] = value["name"]
16
- config = task_args | dict(profile.eval_config.model_dump(exclude_none=True))
16
+ config = dict(profile.eval_config.model_dump(exclude_none=True)) | task_args
17
17
  if generate_config:
18
- config = config | dict(profile.generate_config.model_dump(exclude_none=True))
18
+ config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
19
19
  if profile.tags:
20
20
  config["tags"] = ",".join(profile.tags)
21
21
  config_print: list[str] = []
22
22
  for name, value in config.items():
23
- if name == "approval":
23
+ if name == "approval" and isinstance(value, dict):
24
24
  config_print.append(
25
25
  f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
26
26
  )
@@ -50,9 +50,13 @@ def task_panel(
50
50
  table.add_row(subtitle_table)
51
51
 
52
52
  # main progress and task info
53
- table.add_row()
54
- table.add_row(body)
55
- table.add_row()
53
+ if body:
54
+ table.add_row()
55
+ table.add_row(body)
56
+
57
+ # spacing if there is more ocontent
58
+ if footer or log_location:
59
+ table.add_row()
56
60
 
57
61
  # footer if specified
58
62
  if footer:
File without changes
@@ -0,0 +1,203 @@
1
+ import asyncio
2
+ import contextlib
3
+ from typing import Any, AsyncIterator, Coroutine, Iterator
4
+
5
+ import rich
6
+
7
+ from inspect_ai._display.core.rich import rich_initialise
8
+ from inspect_ai._util.text import truncate
9
+ from inspect_ai._util.throttle import throttle
10
+
11
+ from ...util._concurrency import concurrency_status
12
+ from ..core.config import task_config
13
+ from ..core.display import (
14
+ TR,
15
+ Display,
16
+ Progress,
17
+ TaskDisplay,
18
+ TaskDisplayMetric,
19
+ TaskProfile,
20
+ TaskResult,
21
+ TaskScreen,
22
+ TaskSpec,
23
+ TaskWithResult,
24
+ )
25
+ from ..core.footer import task_http_rate_limits
26
+ from ..core.panel import task_panel, task_targets
27
+ from ..core.results import task_metric, tasks_results
28
+
29
+
30
+ class PlainDisplay(Display):
31
+ def __init__(self) -> None:
32
+ self.total_tasks: int = 0
33
+ self.tasks: list[TaskWithResult] = []
34
+ self.parallel = False
35
+ rich_initialise()
36
+
37
+ def print(self, message: str) -> None:
38
+ print(message)
39
+
40
+ @contextlib.contextmanager
41
+ def progress(self, total: int) -> Iterator[Progress]:
42
+ yield PlainProgress(total)
43
+
44
+ def run_task_app(self, main: Coroutine[Any, Any, TR]) -> TR:
45
+ return asyncio.run(main)
46
+
47
+ @contextlib.contextmanager
48
+ def suspend_task_app(self) -> Iterator[None]:
49
+ yield
50
+
51
+ @contextlib.asynccontextmanager
52
+ async def task_screen(
53
+ self, tasks: list[TaskSpec], parallel: bool
54
+ ) -> AsyncIterator[TaskScreen]:
55
+ self.total_tasks = len(tasks)
56
+ self.multiple_task_names = len({task.name for task in tasks}) > 1
57
+ self.multiple_model_names = len({str(task.model) for task in tasks}) > 1
58
+ self.tasks = []
59
+ self.parallel = parallel
60
+ try:
61
+ # Print header for task(s)
62
+ if parallel:
63
+ print(f"Running {self.total_tasks} tasks...")
64
+ yield TaskScreen()
65
+ finally:
66
+ # Print final results
67
+ if self.tasks:
68
+ self._print_results()
69
+
70
+ @contextlib.contextmanager
71
+ def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
72
+ # Print initial task information using a rich panel
73
+ panel = task_panel(
74
+ profile=profile,
75
+ show_model=True,
76
+ body="", # Empty body since we haven't started yet
77
+ subtitle=(task_config(profile), task_targets(profile)),
78
+ footer=None,
79
+ log_location=None,
80
+ )
81
+ rich.print(panel)
82
+
83
+ # Create and yield task display
84
+ task = TaskWithResult(profile, None)
85
+ self.tasks.append(task)
86
+ yield PlainTaskDisplay(
87
+ task,
88
+ show_task_names=self.multiple_task_names,
89
+ show_model_names=self.multiple_model_names,
90
+ )
91
+
92
+ def _print_results(self) -> None:
93
+ """Print final results using rich panels"""
94
+ panels = tasks_results(self.tasks)
95
+ rich.print(panels)
96
+
97
+
98
+ class PlainProgress(Progress):
99
+ def __init__(self, total: int):
100
+ self.total = total
101
+ self.current = 0
102
+
103
+ def update(self, n: int = 1) -> None:
104
+ self.current += n
105
+ # No direct printing - PlainTaskDisplay handles it
106
+
107
+ def complete(self) -> None:
108
+ self.current = self.total
109
+
110
+
111
+ class PlainTaskDisplay(TaskDisplay):
112
+ def __init__(
113
+ self, task: TaskWithResult, *, show_task_names: bool, show_model_names: bool
114
+ ):
115
+ self.task = task
116
+ self.show_task_names = show_task_names
117
+ self.show_model_names = show_model_names
118
+ self.progress_display: PlainProgress | None = None
119
+ self.samples_complete = 0
120
+ self.samples_total = 0
121
+ self.current_metrics: list[TaskDisplayMetric] | None = None
122
+ self.last_progress = 0 # Track last progress percentage
123
+
124
+ @contextlib.contextmanager
125
+ def progress(self) -> Iterator[Progress]:
126
+ self.progress_display = PlainProgress(self.task.profile.steps)
127
+ yield self.progress_display
128
+
129
+ @throttle(1)
130
+ def _print_status_throttled(self) -> None:
131
+ self._print_status()
132
+
133
+ def _print_status(self) -> None:
134
+ """Print status updates on new lines when there's meaningful progress"""
135
+ if not self.progress_display:
136
+ return
137
+
138
+ # Calculate current progress percentage
139
+ current_progress = int(
140
+ self.progress_display.current / self.progress_display.total * 100
141
+ )
142
+
143
+ # Only print on percentage changes to avoid too much output
144
+ if current_progress != self.last_progress:
145
+ status_parts: list[str] = []
146
+
147
+ # if this is parallel print task and model to distinguish (limit both to 12 chars)
148
+ MAX_NAME_WIDTH = 12
149
+ if self.show_task_names:
150
+ status_parts.append(truncate(self.task.profile.name, MAX_NAME_WIDTH))
151
+ if self.show_model_names:
152
+ status_parts.append(
153
+ truncate(str(self.task.profile.model), MAX_NAME_WIDTH)
154
+ )
155
+
156
+ # Add step progress
157
+ status_parts.append(
158
+ f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {current_progress:3d}%"
159
+ )
160
+
161
+ # Add sample progress
162
+ status_parts.append(
163
+ f"Samples: {self.samples_complete:3d}/{self.samples_total:3d}"
164
+ )
165
+
166
+ # Add metrics
167
+ if self.current_metrics:
168
+ metric_str = task_metric(self.current_metrics)
169
+ status_parts.append(metric_str)
170
+
171
+ # Add resource usage
172
+ # Very similar to ``inspect_ai._display.core.footer.task_resources``, but without
173
+ # the rich formatting added in the ``task_dict`` call
174
+ resources_dict: dict[str, str] = {}
175
+ for model, resource in concurrency_status().items():
176
+ resources_dict[model] = f"{resource[0]:2d}/{resource[1]:2d}"
177
+ resources = ", ".join(
178
+ [f"{key}: {value}" for key, value in resources_dict.items()]
179
+ )
180
+ status_parts.append(resources)
181
+
182
+ # Add rate limits
183
+ rate_limits = task_http_rate_limits()
184
+ if rate_limits:
185
+ status_parts.append(rate_limits)
186
+
187
+ # Print on new line
188
+ print(" | ".join(status_parts))
189
+
190
+ self.last_progress = current_progress
191
+
192
+ def sample_complete(self, complete: int, total: int) -> None:
193
+ self.samples_complete = complete
194
+ self.samples_total = total
195
+ self._print_status_throttled()
196
+
197
+ def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
198
+ self.current_metrics = metrics
199
+ self._print_status_throttled()
200
+
201
+ def complete(self, result: TaskResult) -> None:
202
+ self.task.result = result
203
+ self._print_status()
@@ -129,11 +129,6 @@ class RichDisplay(Display):
129
129
  @override
130
130
  @contextlib.contextmanager
131
131
  def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
132
- # if there is no ansi display than all of the below will
133
- # be a no-op, so we print a simple text message for the task
134
- if display_type() == "plain":
135
- rich.get_console().print(task_no_ansi(profile))
136
-
137
132
  # for typechekcer
138
133
  if self.tasks is None:
139
134
  self.tasks = []
@@ -0,0 +1,110 @@
1
+ from typing import Literal
2
+
3
+ from textual.app import ComposeResult
4
+ from textual.containers import HorizontalScroll
5
+ from textual.widget import Widget
6
+ from textual.widgets import Link, Static
7
+
8
+ from inspect_ai._util.port_names import get_service_by_port
9
+ from inspect_ai.util._sandbox.environment import PortMapping
10
+
11
+
12
+ class PortMappingsView(HorizontalScroll):
13
+ DEFAULT_CSS = """
14
+ PortMappingsView {
15
+ layout: grid;
16
+ height: auto;
17
+ grid-size: 4 3;
18
+ grid-columns: auto auto auto auto;
19
+ grid-gutter: 0 1;
20
+ }
21
+ """
22
+
23
+ def __init__(self, ports: list[PortMapping] | None) -> None:
24
+ super().__init__()
25
+ self.ports = ports
26
+
27
+ def compose(self) -> ComposeResult:
28
+ if not self.ports:
29
+ return
30
+ yield Static("service")
31
+ yield Static("sandbox")
32
+ yield Static("client")
33
+ yield Static("endpoint")
34
+ mappings_and_services = [
35
+ (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
36
+ for mapping in self.ports
37
+ ]
38
+ remaining_widgets = [
39
+ widget
40
+ for mapping_and_service in mappings_and_services
41
+ for widget in widgets_from_port_mapping(mapping_and_service)
42
+ ]
43
+ for widget in remaining_widgets:
44
+ yield widget
45
+
46
+
47
+ def widgets_for_port_mappings(
48
+ port_mappings: list[PortMapping] | None,
49
+ ) -> list[Widget]:
50
+ if port_mappings is None:
51
+ return []
52
+ return [
53
+ static
54
+ for mapping in [
55
+ (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
56
+ for mapping in port_mappings
57
+ ]
58
+ for static in widgets_from_port_mapping(mapping)
59
+ ]
60
+
61
+
62
+ def widgets_from_port_mapping(
63
+ mapping_service_tuple: tuple[PortMapping, str | None],
64
+ ) -> list[Widget]:
65
+ port_mapping, service = mapping_service_tuple
66
+ return [
67
+ widget
68
+ for host_mapping in port_mapping.mappings
69
+ for widget in get_row_widgets(
70
+ port_mapping.protocol,
71
+ host_mapping.host_port,
72
+ port_mapping.container_port,
73
+ service,
74
+ )
75
+ ]
76
+
77
+
78
+ def get_row_widgets(
79
+ protocol: Literal["tcp", "udp"],
80
+ host_port: int,
81
+ container_port: int,
82
+ service: str | None,
83
+ ) -> list[Widget]:
84
+ url = get_url(
85
+ host_port,
86
+ service,
87
+ )
88
+ return [
89
+ Static(service if service is not None else protocol),
90
+ Static(str(container_port)),
91
+ Static(str(host_port)),
92
+ Link(url) if url is not None else Static("asdf"),
93
+ ]
94
+
95
+
96
+ def get_url(
97
+ host_port: int,
98
+ service: str | None,
99
+ ) -> str | None:
100
+ if service is not None:
101
+ if service == "noVNC":
102
+ return f"http://localhost:{host_port}?view_only=true&autoconnect=true&resize=scale"
103
+
104
+ if service.startswith("HTTP"):
105
+ return f"https://localhost:{host_port}"
106
+
107
+ if service.startswith("VNC"):
108
+ return f"vnc://localhost:{host_port}"
109
+
110
+ return None
@@ -5,29 +5,28 @@ from rich.console import RenderableType
5
5
  from rich.table import Table
6
6
  from rich.text import Text
7
7
  from textual.app import ComposeResult
8
- from textual.containers import (
9
- Horizontal,
10
- HorizontalGroup,
11
- Vertical,
12
- VerticalGroup,
13
- )
8
+ from textual.containers import Horizontal, HorizontalGroup, Vertical, VerticalGroup
14
9
  from textual.reactive import reactive
15
10
  from textual.widget import Widget
16
11
  from textual.widgets import (
17
12
  Button,
18
13
  Collapsible,
14
+ Link,
19
15
  LoadingIndicator,
20
16
  OptionList,
21
17
  Static,
22
18
  )
23
19
  from textual.widgets.option_list import Option, Separator
24
20
 
21
+ from inspect_ai._display.textual.widgets.port_mappings import get_url
25
22
  from inspect_ai._util.format import format_progress_time
23
+ from inspect_ai._util.port_names import get_service_by_port
26
24
  from inspect_ai._util.registry import registry_unqualified_name
27
25
  from inspect_ai.log._samples import ActiveSample
28
26
  from inspect_ai.log._transcript import ToolEvent
29
27
 
30
28
  from .clock import Clock
29
+ from .sandbox import SandboxView
31
30
  from .transcript import TranscriptView
32
31
 
33
32
 
@@ -74,6 +73,7 @@ class SamplesView(Widget):
74
73
 
75
74
  async def set_highlighted_sample(self, highlighted: int | None) -> None:
76
75
  sample_info = self.query_one(SampleInfo)
76
+ sample_vnc = self.query_one(SampleVNC)
77
77
  transcript_view = self.query_one(TranscriptView)
78
78
  sample_toolbar = self.query_one(SampleToolbar)
79
79
  if highlighted is not None:
@@ -83,12 +83,14 @@ class SamplesView(Widget):
83
83
  transcript_view.display = True
84
84
  sample_toolbar.display = True
85
85
  await sample_info.sync_sample(sample)
86
+ await sample_vnc.sync_sample(sample)
86
87
  await transcript_view.sync_sample(sample)
87
88
  await sample_toolbar.sync_sample(sample)
88
89
  return
89
90
 
90
91
  # otherwise hide ui
91
92
  sample_info.display = False
93
+ sample_vnc.display = False
92
94
  transcript_view.display = False
93
95
  sample_toolbar.display = False
94
96
 
@@ -182,10 +184,59 @@ class SamplesList(OptionList):
182
184
  return None
183
185
 
184
186
 
185
- class SampleInfo(Horizontal):
187
+ class SampleVNC(Horizontal):
188
+ DEFAULT_CSS = """
189
+ SampleVNC {
190
+ layout: grid;
191
+ grid-size: 2 1;
192
+ grid-columns: auto 1fr;
193
+ }
194
+ SampleVNC Static {
195
+ color: $secondary;
196
+ }
197
+ SampleVNC Link {
198
+ color: $accent;
199
+ }
200
+ """
201
+
202
+ def __init__(self) -> None:
203
+ super().__init__()
204
+ self._sample: ActiveSample | None = None
205
+
206
+ def compose(self) -> ComposeResult:
207
+ yield Static("VNC: ")
208
+ yield Link("")
209
+
210
+ async def sync_sample(self, sample: ActiveSample) -> None:
211
+ if sample == self._sample:
212
+ return
213
+
214
+ # defult to hidden (show if we find a vnc connection)
215
+ self.display = False
216
+
217
+ # is there a vnc connection? if so populate
218
+ for connection in [c for c in sample.sandboxes.values() if c.ports]:
219
+ for port in connection.ports or []:
220
+ service = get_service_by_port(port.container_port, port.protocol)
221
+ if service == "noVNC" and port.mappings:
222
+ host_mappings = port.mappings
223
+ link = self.query_one(Link)
224
+ vnc_url = get_url(host_mappings[0].host_port, service)
225
+ if vnc_url:
226
+ link.text = vnc_url
227
+ link.url = link.text
228
+ self.display = True
229
+ break
230
+
231
+
232
+ class SampleInfo(Vertical):
186
233
  DEFAULT_CSS = """
187
234
  SampleInfo {
188
235
  color: $text-muted;
236
+ layout: grid;
237
+ grid-size: 1 2;
238
+ grid-rows: auto 1;
239
+ grid-gutter: 1;
189
240
  }
190
241
  SampleInfo Collapsible {
191
242
  padding: 0;
@@ -218,11 +269,13 @@ class SampleInfo(Horizontal):
218
269
  def __init__(self) -> None:
219
270
  super().__init__()
220
271
  self._sample: ActiveSample | None = None
272
+ self._sandbox_count: int | None = None
221
273
 
222
274
  def compose(self) -> ComposeResult:
223
275
  with Collapsible(title=""):
224
276
  yield SampleLimits()
225
277
  yield SandboxesView()
278
+ yield SampleVNC()
226
279
 
227
280
  async def sync_sample(self, sample: ActiveSample | None) -> None:
228
281
  if sample is None:
@@ -233,12 +286,14 @@ class SampleInfo(Horizontal):
233
286
  limits = self.query_one(SampleLimits)
234
287
  await limits.sync_sample(sample)
235
288
 
289
+ new_sandbox_count = len(sample.sandboxes)
236
290
  # bail if we've already processed this sample
237
- if self._sample == sample:
291
+ if self._sample == sample and self._sandbox_count == new_sandbox_count:
238
292
  return
239
293
 
240
294
  # set sample
241
295
  self._sample = sample
296
+ self._sandbox_count = new_sandbox_count
242
297
 
243
298
  # update UI
244
299
  self.display = True
@@ -246,6 +301,7 @@ class SampleInfo(Horizontal):
246
301
  self.query_one(Collapsible).title = title
247
302
  sandboxes = self.query_one(SandboxesView)
248
303
  await sandboxes.sync_sample(sample)
304
+ await self.query_one(SampleVNC).sync_sample(sample)
249
305
 
250
306
 
251
307
  class SampleLimits(Widget):
@@ -295,6 +351,9 @@ class SandboxesView(Vertical):
295
351
  background: transparent;
296
352
  height: auto;
297
353
  }
354
+ #sandboxes-list {
355
+ height: auto;
356
+ }
298
357
  SandboxesView Static {
299
358
  background: transparent;
300
359
  }
@@ -312,16 +371,24 @@ class SandboxesView(Vertical):
312
371
 
313
372
  async def sync_sample(self, sample: ActiveSample) -> None:
314
373
  if len(sample.sandboxes) > 0:
374
+ multiple_sandboxes = len(sample.sandboxes) > 1
315
375
  self.display = True
316
376
  sandboxes_caption = cast(Static, self.query_one("#sandboxes-caption"))
317
- sandboxes_caption.update("[bold]sandbox containers:[/bold]")
377
+ sandboxes_caption.update(
378
+ f"[bold]sandbox container{'s' if multiple_sandboxes else ''}:[/bold]"
379
+ )
318
380
 
319
381
  sandboxes_list = self.query_one("#sandboxes-list")
320
382
  await sandboxes_list.remove_children()
383
+
321
384
  await sandboxes_list.mount_all(
322
- [Static(sandbox.command) for sandbox in sample.sandboxes.values()]
385
+ [
386
+ SandboxView(connection, name if multiple_sandboxes else None)
387
+ for name, connection in sample.sandboxes.items()
388
+ ]
323
389
  )
324
- sandboxes_list.mount(
390
+
391
+ await sandboxes_list.mount(
325
392
  Static(
326
393
  "[italic]Hold down Alt (or Option) to select text for copying[/italic]",
327
394
  classes="clipboard-message",
@@ -0,0 +1,37 @@
1
+ from textual.app import ComposeResult
2
+ from textual.containers import Horizontal, Vertical
3
+ from textual.widgets import Static
4
+
5
+ from inspect_ai.util._sandbox.environment import SandboxConnection
6
+
7
+ from .port_mappings import PortMappingsView
8
+
9
+
10
+ class SandboxView(Vertical):
11
+ DEFAULT_CSS = """
12
+ .indent {
13
+ width: 2;
14
+ }
15
+ .no_indent {
16
+ width: 0;
17
+ }
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ connection: SandboxConnection,
23
+ name: str | None, # if None, no header or indent
24
+ ) -> None:
25
+ super().__init__()
26
+ self.sandbox_name = name
27
+ self.connection = connection
28
+
29
+ def compose(self) -> ComposeResult:
30
+ if self.sandbox_name:
31
+ yield Static(self.sandbox_name)
32
+ with Horizontal():
33
+ yield Static("", classes="indent" if self.sandbox_name else "no_indent")
34
+ with Vertical():
35
+ yield Static(self.connection.command)
36
+ if self.connection.ports:
37
+ yield PortMappingsView(self.connection.ports)