inspect-ai 0.3.54__py3-none-any.whl → 0.3.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. inspect_ai/__init__.py +1 -0
  2. inspect_ai/_cli/common.py +1 -1
  3. inspect_ai/_cli/trace.py +33 -20
  4. inspect_ai/_display/core/active.py +1 -1
  5. inspect_ai/_display/core/display.py +1 -1
  6. inspect_ai/_display/core/footer.py +1 -1
  7. inspect_ai/_display/core/progress.py +0 -6
  8. inspect_ai/_display/core/rich.py +1 -1
  9. inspect_ai/_display/rich/display.py +2 -2
  10. inspect_ai/_display/textual/app.py +15 -17
  11. inspect_ai/_display/textual/widgets/clock.py +3 -3
  12. inspect_ai/_display/textual/widgets/samples.py +6 -13
  13. inspect_ai/_eval/context.py +9 -1
  14. inspect_ai/_eval/score.py +4 -10
  15. inspect_ai/_eval/task/log.py +2 -1
  16. inspect_ai/_eval/task/results.py +5 -4
  17. inspect_ai/_eval/task/run.py +6 -12
  18. inspect_ai/_eval/task/task.py +10 -0
  19. inspect_ai/_util/ansi.py +31 -0
  20. inspect_ai/_util/format.py +7 -0
  21. inspect_ai/_util/logger.py +12 -12
  22. inspect_ai/_util/throttle.py +10 -1
  23. inspect_ai/_util/trace.py +43 -47
  24. inspect_ai/_util/transcript.py +4 -0
  25. inspect_ai/_util/vscode.py +51 -0
  26. inspect_ai/_view/notify.py +2 -1
  27. inspect_ai/_view/www/App.css +22 -1
  28. inspect_ai/_view/www/dist/assets/index.css +2374 -2
  29. inspect_ai/_view/www/dist/assets/index.js +29622 -24424
  30. inspect_ai/_view/www/log-schema.json +138 -90
  31. inspect_ai/_view/www/package.json +1 -0
  32. inspect_ai/_view/www/src/App.mjs +1 -0
  33. inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
  34. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
  35. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
  36. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
  37. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
  38. inspect_ai/_view/www/src/components/Tools.mjs +11 -3
  39. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
  40. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
  41. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
  42. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
  43. inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
  44. inspect_ai/_view/www/src/types/log.d.ts +26 -12
  45. inspect_ai/_view/www/yarn.lock +44 -0
  46. inspect_ai/approval/_apply.py +4 -0
  47. inspect_ai/approval/_human/panel.py +5 -8
  48. inspect_ai/dataset/_dataset.py +51 -10
  49. inspect_ai/dataset/_util.py +31 -3
  50. inspect_ai/log/__init__.py +2 -0
  51. inspect_ai/log/_log.py +5 -2
  52. inspect_ai/model/_cache.py +1 -1
  53. inspect_ai/model/_call_tools.py +4 -2
  54. inspect_ai/model/_chat_message.py +3 -0
  55. inspect_ai/model/_model.py +42 -1
  56. inspect_ai/model/_providers/anthropic.py +4 -0
  57. inspect_ai/model/_providers/openai.py +11 -1
  58. inspect_ai/model/_render.py +9 -2
  59. inspect_ai/scorer/_metric.py +12 -1
  60. inspect_ai/solver/__init__.py +2 -0
  61. inspect_ai/solver/_human_agent/agent.py +83 -0
  62. inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
  63. inspect_ai/solver/_human_agent/commands/clock.py +70 -0
  64. inspect_ai/solver/_human_agent/commands/command.py +59 -0
  65. inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
  66. inspect_ai/solver/_human_agent/commands/note.py +42 -0
  67. inspect_ai/solver/_human_agent/commands/score.py +80 -0
  68. inspect_ai/solver/_human_agent/commands/status.py +62 -0
  69. inspect_ai/solver/_human_agent/commands/submit.py +151 -0
  70. inspect_ai/solver/_human_agent/install.py +222 -0
  71. inspect_ai/solver/_human_agent/panel.py +252 -0
  72. inspect_ai/solver/_human_agent/service.py +45 -0
  73. inspect_ai/solver/_human_agent/state.py +55 -0
  74. inspect_ai/solver/_human_agent/view.py +24 -0
  75. inspect_ai/solver/_task_state.py +28 -2
  76. inspect_ai/tool/_tool.py +10 -2
  77. inspect_ai/tool/_tools/_web_browser/_web_browser.py +13 -10
  78. inspect_ai/util/__init__.py +8 -4
  79. inspect_ai/{_util/display.py → util/_display.py} +6 -0
  80. inspect_ai/util/_panel.py +31 -9
  81. inspect_ai/util/_sandbox/__init__.py +0 -3
  82. inspect_ai/util/_sandbox/context.py +5 -1
  83. inspect_ai/util/_sandbox/docker/compose.py +16 -10
  84. inspect_ai/util/_sandbox/docker/docker.py +9 -6
  85. inspect_ai/util/_sandbox/docker/internal.py +1 -1
  86. inspect_ai/util/_sandbox/docker/util.py +2 -2
  87. inspect_ai/util/_sandbox/environment.py +6 -5
  88. inspect_ai/util/_sandbox/local.py +1 -1
  89. inspect_ai/util/_sandbox/service.py +22 -7
  90. inspect_ai/util/_store.py +5 -6
  91. inspect_ai/util/_store_model.py +110 -0
  92. inspect_ai/util/_throttle.py +32 -0
  93. {inspect_ai-0.3.54.dist-info → inspect_ai-0.3.56.dist-info}/METADATA +2 -2
  94. {inspect_ai-0.3.54.dist-info → inspect_ai-0.3.56.dist-info}/RECORD +98 -76
  95. {inspect_ai-0.3.54.dist-info → inspect_ai-0.3.56.dist-info}/LICENSE +0 -0
  96. {inspect_ai-0.3.54.dist-info → inspect_ai-0.3.56.dist-info}/WHEEL +0 -0
  97. {inspect_ai-0.3.54.dist-info → inspect_ai-0.3.56.dist-info}/entry_points.txt +0 -0
  98. {inspect_ai-0.3.54.dist-info → inspect_ai-0.3.56.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,151 @@
1
+ from argparse import Namespace
2
+ from logging import getLogger
3
+ from pathlib import PurePosixPath
4
+ from re import Pattern, compile, match
5
+ from typing import Awaitable, Callable, Literal
6
+
7
+ from pydantic import JsonValue
8
+
9
+ from inspect_ai._util.ansi import render_text
10
+ from inspect_ai.util._sandbox import sandbox
11
+
12
+ from ..install import RECORD_SESSION_DIR
13
+ from ..state import HumanAgentState
14
+ from .command import HumanAgentCommand, call_human_agent
15
+
16
+ logger = getLogger(__name__)
17
+
18
+
19
+ class SubmitCommand(HumanAgentCommand):
20
+ def __init__(self, record_session: bool):
21
+ super().__init__()
22
+ self._record_session = record_session
23
+
24
+ @property
25
+ def name(self) -> str:
26
+ return "submit"
27
+
28
+ @property
29
+ def description(self) -> str:
30
+ return "Submit your final answer for the task."
31
+
32
+ @property
33
+ def group(self) -> Literal[1, 2, 3]:
34
+ return 1
35
+
36
+ @property
37
+ def cli_args(self) -> list[HumanAgentCommand.CLIArg]:
38
+ return [
39
+ HumanAgentCommand.CLIArg(
40
+ name="answer",
41
+ description="Answer to submit for scoring (optional, not required for all tasks)",
42
+ )
43
+ ]
44
+
45
+ def cli(self, args: Namespace) -> None:
46
+ # read cli args
47
+ call_args = vars(args)
48
+
49
+ # first validate (print and exit if we get a str back)
50
+ error = call_human_agent("validate", **call_args)
51
+ if error:
52
+ print(error)
53
+ return
54
+
55
+ # verify that the user wants to proceed
56
+ answer = call_args.get("answer", None)
57
+ answer_text = f" '{answer}'" if answer else ""
58
+ while True:
59
+ response = (
60
+ input(
61
+ f"\nDo you definitely want to end the task and submit{answer_text}?\n\nThis will disconnect you from the task environment and you won't be able to reconnect.\n\nYes (y) or No (n): "
62
+ )
63
+ .lower()
64
+ .strip()
65
+ )
66
+ if response in ["yes", "y"]:
67
+ break
68
+ elif response in ["no", "n"]:
69
+ return
70
+ else:
71
+ print("Please enter yes or no.")
72
+
73
+ # thank the user!
74
+ print(
75
+ "\nThank you for working on this task!\n\n"
76
+ + "Your task will now be scored and you will be disconnected from this container.\n"
77
+ )
78
+
79
+ # submit the task
80
+ call_human_agent("submit", **call_args)
81
+
82
+ def service(self, state: HumanAgentState) -> Callable[..., Awaitable[JsonValue]]:
83
+ async def submit(
84
+ answer: str | None, session_logs: dict[str, str] | None = None
85
+ ) -> None:
86
+ if self._record_session:
87
+ state.logs = await self._read_session_logs()
88
+ state.running = False
89
+ state.answer = answer
90
+
91
+ return submit
92
+
93
+ async def _read_session_logs(self) -> dict[str, str]:
94
+ # retreive session logs (don't fail)
95
+ sessions_dir = PurePosixPath(RECORD_SESSION_DIR)
96
+ result = await sandbox().exec(["ls", "-1", sessions_dir.as_posix()])
97
+ if not result.success:
98
+ logger.warning(f"Error listing human agent session logs: {result.stderr}")
99
+ return {}
100
+
101
+ # read logs
102
+ session_logs: dict[str, str] = {}
103
+ for session_log in result.stdout.strip().splitlines():
104
+ try:
105
+ session_logs[session_log] = await sandbox().read_file(
106
+ (sessions_dir / session_log).as_posix()
107
+ )
108
+ except Exception as ex:
109
+ logger.warning(f"Error reading human agent session log: {ex}")
110
+
111
+ return session_logs
112
+
113
+
114
+ class ValidateCommand(HumanAgentCommand):
115
+ def __init__(self, answer: bool | str) -> None:
116
+ self._answer = compile(answer) if isinstance(answer, str) else answer
117
+
118
+ @property
119
+ def name(self) -> str:
120
+ return "validate"
121
+
122
+ @property
123
+ def description(self) -> str:
124
+ return "Validate a task submission."
125
+
126
+ @property
127
+ def contexts(self) -> list[Literal["cli", "service"]]:
128
+ return ["service"]
129
+
130
+ def service(self, state: HumanAgentState) -> Callable[..., Awaitable[JsonValue]]:
131
+ async def validate(answer: str | None) -> str | None:
132
+ def failed(reason: str) -> str:
133
+ return render_text(f"[bold]FAILED:[/bold] {reason}")
134
+
135
+ if not state.running:
136
+ return failed("Task is stopped (use 'task start' to start)")
137
+ if self._answer:
138
+ answer = answer.strip() if isinstance(answer, str) else answer
139
+ if not answer:
140
+ return failed(
141
+ "An explicit answer is required for scoring this task."
142
+ )
143
+ elif isinstance(self._answer, Pattern) and not match(
144
+ self._answer, answer
145
+ ):
146
+ return failed(
147
+ "Your answer was not in the required format (please review the task instructions)"
148
+ )
149
+ return None # made it through verification
150
+
151
+ return validate
@@ -0,0 +1,222 @@
1
+ import inspect
2
+ from textwrap import dedent
3
+
4
+ from inspect_ai.util import sandbox
5
+
6
+ from .._task_state import TaskState
7
+ from .commands.command import HumanAgentCommand
8
+
9
+ INSTALL_DIR = "human_agent_install"
10
+ HUMAN_AGENT_DIR = "/opt/human_agent"
11
+ TASK_PY = "task.py"
12
+ INSTALL_SH = "install.sh"
13
+ BASHRC = ".bashrc"
14
+ WELCOME_FILE = "welcome.txt"
15
+ WELCOME_LOGIN_FILE = "welcome_login.txt"
16
+ INSTRUCTIONS_FILE = "instructions.txt"
17
+ RECORD_SESSION_DIR = "/var/tmp/user-sessions"
18
+
19
+
20
+ async def install_human_agent(
21
+ state: TaskState, commands: list[HumanAgentCommand], record_session: bool
22
+ ) -> None:
23
+ # see if we have already installed
24
+ if not (await sandbox().exec(["mkdir", HUMAN_AGENT_DIR])).success:
25
+ return
26
+
27
+ # setup installation directory
28
+ await checked_exec(["mkdir", "-p", INSTALL_DIR])
29
+
30
+ # generate task.py
31
+ task_py = human_agent_commands(commands)
32
+ await checked_write_file(f"{INSTALL_DIR}/{TASK_PY}", task_py, executable=True)
33
+
34
+ # generate .bashrc
35
+ bash_rc = human_agent_bashrc(commands, record_session)
36
+ await checked_write_file(f"{INSTALL_DIR}/{BASHRC}", bash_rc, executable=True)
37
+
38
+ # write and run installation script
39
+ install_sh = human_agent_install_sh()
40
+ await checked_write_file(f"{INSTALL_DIR}/{INSTALL_SH}", install_sh, executable=True)
41
+ await checked_exec(["bash", f"./{INSTALL_SH}"], cwd=INSTALL_DIR)
42
+ await checked_exec(["rm", "-rf", INSTALL_DIR])
43
+
44
+
45
+ def human_agent_commands(commands: list[HumanAgentCommand]) -> str:
46
+ # filter out hidden commands
47
+ commands = [command for command in commands if "cli" in command.contexts]
48
+
49
+ # standard imports (including any dependencies that call methods carry)
50
+ imports = dedent("""
51
+ import argparse
52
+ import sys
53
+ from argparse import Namespace
54
+ from pathlib import Path
55
+
56
+ sys.path.append("/var/tmp/sandbox-services/human_agent")
57
+ from human_agent import call_human_agent
58
+
59
+ def format_time(t):
60
+ minutes, seconds = divmod(t, 60)
61
+ hours, minutes = divmod(minutes, 60)
62
+ return f"{hours:.0f}:{minutes:02.0f}:{seconds:02.0f}"
63
+ """)
64
+
65
+ # command handler source code (extracted from call methods)
66
+ command_handlers = "\n\n".join(
67
+ dedent(
68
+ inspect.getsource(command.cli).replace("cli(self, ", f"{command.name}(", 1)
69
+ )
70
+ for command in commands
71
+ )
72
+
73
+ # parse commands
74
+ command_parsers: list[str] = []
75
+ for command in commands:
76
+ command_parsers.append(
77
+ dedent(f"""
78
+ {command.name}_parser = subparsers.add_parser("{command.name}", help="{command.description}")
79
+ """).lstrip()
80
+ )
81
+ for arg in command.cli_args:
82
+ if arg.name.startswith("--"):
83
+ extras = 'action="store_true", default=False'
84
+ else:
85
+ extras = f"""nargs={1 if arg.required else '"?"'}"""
86
+ command_parsers.append(
87
+ dedent(f"""
88
+ {command.name}_parser.add_argument("{arg.name}", {extras}, help="{arg.description}")
89
+ """).strip()
90
+ )
91
+
92
+ parse = (
93
+ dedent("""
94
+ parser = argparse.ArgumentParser(description="Human agent tools.")
95
+ subparsers = parser.add_subparsers(dest="command")
96
+ """)
97
+ + "\n"
98
+ + "\n".join(command_parsers)
99
+ )
100
+
101
+ # dispatch commands
102
+ command_dispatchers: list[str] = []
103
+ for i, command in enumerate(commands):
104
+ conditional = "if" if i == 0 else "elif"
105
+ command_dispatchers.append(
106
+ f'{conditional} command == "{command.name}": {command.name}(args)'
107
+ )
108
+ command_dispatchers.append("else: parser.print_help()")
109
+
110
+ dispatch = dedent("""
111
+ args = parser.parse_args()
112
+ command = args.command
113
+ delattr(args, 'command')
114
+ """) + "\n".join(command_dispatchers)
115
+
116
+ return "\n".join([imports, command_handlers, parse, dispatch]) + "\n"
117
+
118
+
119
+ def human_agent_bashrc(commands: list[HumanAgentCommand], record_session: bool) -> str:
120
+ # only run in interative terminals
121
+ TERMINAL_CHECK = dedent("""
122
+
123
+ ### Inspect Human Agent Setup #########################################=
124
+
125
+ # only run if shell is interactive
126
+ case $- in
127
+ *i*) ;;
128
+ *) return ;;
129
+ esac
130
+
131
+ # only run if attached to a terminal
132
+ if ! tty -s; then
133
+ return
134
+ fi
135
+ """)
136
+
137
+ # shell alias and completions
138
+ command_names = " ".join(
139
+ [f"{command.name}" for command in commands if "cli" in command.contexts]
140
+ )
141
+ COMMANDS = dedent(f"""
142
+ # shell alias for human agent commands
143
+ alias task='python3 {HUMAN_AGENT_DIR}/{TASK_PY}'
144
+
145
+ # completion handler
146
+ _task_completion() {{
147
+ local cur
148
+ cur="${{COMP_WORDS[COMP_CWORD]}}"
149
+ if [ "$COMP_CWORD" -eq 1 ]; then
150
+ local commands="{command_names}"
151
+
152
+ # Generate completion matches
153
+ COMPREPLY=($(compgen -W "${{commands}}" -- ${{cur}}))
154
+ fi
155
+ }}
156
+ complete -F _task_completion task
157
+ """)
158
+
159
+ # session recording
160
+ if record_session:
161
+ RECORDING = dedent(f"""
162
+ # record human agent session transcript
163
+ if [ -z "$SCRIPT_RUNNING" ]; then
164
+ export SCRIPT_RUNNING=1
165
+ LOGDIR={RECORD_SESSION_DIR}
166
+ mkdir -p "$LOGDIR"
167
+ TIMESTAMP=$(date +%Y%m%d_%H%M%S)
168
+ INPUTFILE="$LOGDIR/$(whoami)_$TIMESTAMP.input"
169
+ OUTPUTFILE="$LOGDIR/$(whoami)_$TIMESTAMP.output"
170
+ TIMINGFILE="$LOGDIR/$(whoami)_$TIMESTAMP.timing"
171
+ exec script -q -f -m advanced -I "$INPUTFILE" -O "$OUTPUTFILE" -T "$TIMINGFILE" -c "bash --login -i"
172
+ fi
173
+ """)
174
+ else:
175
+ RECORDING = ""
176
+
177
+ # display task instructions
178
+ INSTRUCTIONS = dedent("""
179
+ if [ -z "$INSTRUCTIONS_SHOWN" ]; then
180
+ export INSTRUCTIONS_SHOWN=1
181
+ task instructions > instructions.txt
182
+ cat instructions.txt
183
+ fi
184
+ """).lstrip()
185
+
186
+ # return .bashrc
187
+ return "\n".join([TERMINAL_CHECK, COMMANDS, RECORDING, INSTRUCTIONS])
188
+
189
+
190
+ def human_agent_install_sh() -> str:
191
+ return dedent(f"""
192
+ #!/usr/bin/env bash
193
+
194
+ # create installation directory
195
+ HUMAN_AGENT="{HUMAN_AGENT_DIR}"
196
+ mkdir -p $HUMAN_AGENT
197
+
198
+ # copy command script
199
+ cp {TASK_PY} $HUMAN_AGENT
200
+
201
+ # append to .bashrc
202
+ cat {BASHRC} >> ~/{BASHRC}
203
+ """)
204
+
205
+
206
+ async def checked_exec(
207
+ cmd: list[str],
208
+ input: str | bytes | None = None,
209
+ cwd: str | None = None,
210
+ ) -> str:
211
+ result = await sandbox().exec(cmd, input=input, cwd=cwd)
212
+ if not result.success:
213
+ raise RuntimeError(f"Error executing command {' '.join(cmd)}: {result.stderr}")
214
+ return result.stdout
215
+
216
+
217
+ async def checked_write_file(
218
+ file: str, contents: str, executable: bool = False
219
+ ) -> None:
220
+ await checked_exec(["tee", "--", file], input=contents)
221
+ if executable:
222
+ await checked_exec(["chmod", "+x", file])
@@ -0,0 +1,252 @@
1
+ from typing import cast
2
+
3
+ from textual.app import ComposeResult
4
+ from textual.containers import (
5
+ Container,
6
+ Horizontal,
7
+ VerticalScroll,
8
+ )
9
+ from textual.reactive import reactive
10
+ from textual.widgets import (
11
+ Button,
12
+ ContentSwitcher,
13
+ Label,
14
+ Link,
15
+ LoadingIndicator,
16
+ Static,
17
+ )
18
+
19
+ from inspect_ai._util.format import format_progress_time
20
+ from inspect_ai._util.vscode import (
21
+ VSCodeCommand,
22
+ can_execute_vscode_commands,
23
+ execute_vscode_commands,
24
+ )
25
+ from inspect_ai.util import InputPanel, SandboxConnection, throttle
26
+
27
+ from .state import HumanAgentState
28
+
29
+
30
+ class HumanAgentPanel(InputPanel):
31
+ DEFAULT_TITLE = "Human Agent"
32
+
33
+ SANDBOX_VIEW_ID = "human-agent-sandbox-view"
34
+ SANDBOX_INSTRUCTIONS_ID = "sandbox-instructions"
35
+ VSCODE_LINKS_ID = "vscode-links"
36
+ LOGIN_VSCODE_TERMINAL_ID = "login-vscode-terminal"
37
+ LOGIN_VSCODE_WINDOW_ID = "login-vscode-window"
38
+ COMMAND_INSTRUCTIONS_ID = "command-instructions"
39
+ SANDBOX_COMMAND_ID = "sandbox-command"
40
+
41
+ INSTRUCTIONS_CLASS = "instructions"
42
+ LINK_LABEL_CLASS = "link-label"
43
+
44
+ DEFAULT_CSS = f"""
45
+ #{SANDBOX_VIEW_ID} {{
46
+ scrollbar-size-vertical: 1;
47
+ }}
48
+ HumanAgentPanel .{INSTRUCTIONS_CLASS} {{
49
+ color: $text-muted;
50
+ margin-bottom: 1;
51
+ }}
52
+ #{SANDBOX_COMMAND_ID} {{
53
+ color: $secondary;
54
+ }}
55
+ HumanAgentPanel .{LINK_LABEL_CLASS} {{
56
+ color: $text-muted;
57
+ }}
58
+ HumanAgentPanel VSCodeLink {{
59
+ margin-left: 1;
60
+ margin-right: 2;
61
+ }}
62
+ HumanAgentPanel #{VSCODE_LINKS_ID} {{
63
+ height: 1;
64
+ margin-bottom: 1;
65
+ }}
66
+ """
67
+
68
+ connection: reactive[SandboxConnection | None] = reactive(None)
69
+
70
+ # implement HumanAgentView
71
+ def connect(self, connection: SandboxConnection) -> None:
72
+ self.connection = connection
73
+
74
+ @throttle(1)
75
+ def update_state(self, state: HumanAgentState) -> None:
76
+ status_bar = self.query_one(StatusBar)
77
+ status_bar.running = state.running
78
+ status_bar.time = state.time
79
+
80
+ def compose(self) -> ComposeResult:
81
+ with ContentSwitcher(initial=LoadingView.ID):
82
+ yield LoadingView()
83
+ with VerticalScroll(id=self.SANDBOX_VIEW_ID):
84
+ yield StatusBar()
85
+ yield Static(
86
+ id=self.SANDBOX_INSTRUCTIONS_ID, classes=self.INSTRUCTIONS_CLASS
87
+ )
88
+ with Horizontal(id=self.VSCODE_LINKS_ID):
89
+ yield Label("Login:", classes=self.LINK_LABEL_CLASS)
90
+ yield VSCodeLink(
91
+ "VS Code Window",
92
+ id=self.LOGIN_VSCODE_WINDOW_ID,
93
+ )
94
+ yield Label("Login:", classes=self.LINK_LABEL_CLASS)
95
+ yield VSCodeLink(
96
+ "VS Code Terminal",
97
+ id=self.LOGIN_VSCODE_TERMINAL_ID,
98
+ )
99
+ yield Static(
100
+ id=self.COMMAND_INSTRUCTIONS_ID, classes=self.INSTRUCTIONS_CLASS
101
+ )
102
+ yield Static(id=self.SANDBOX_COMMAND_ID)
103
+
104
+ def watch_connection(self, connection: SandboxConnection | None) -> None:
105
+ if connection:
106
+ # switch to sandbox view
107
+ self.query_one(ContentSwitcher).current = self.SANDBOX_VIEW_ID
108
+
109
+ # note whether we are in vscode
110
+ vscode = can_execute_vscode_commands()
111
+
112
+ # suffix for instructions based on whether we are in vscode
113
+ instructions_command = "Login to the system with the following command (hold down Alt or Option to select text for copying):"
114
+ instructions_vscode = (
115
+ "Use the links below to login to the system within VS Code:"
116
+ )
117
+
118
+ # update instructions
119
+ instructions_lbl = cast(
120
+ Static, self.query_one(f"#{self.SANDBOX_INSTRUCTIONS_ID}")
121
+ )
122
+ instructions = f"""You are completing a task on a Linux system (task instructions will be presented when you login). {instructions_vscode if vscode else instructions_command}"""
123
+ instructions_lbl.update(instructions)
124
+
125
+ # login: vscode terminal
126
+ vscode_links = self.query_one(f"#{self.VSCODE_LINKS_ID}")
127
+ vscode_links.display = vscode
128
+ terminal_btn = cast(
129
+ VSCodeLink, self.query_one(f"#{self.LOGIN_VSCODE_TERMINAL_ID}")
130
+ )
131
+ terminal_btn.commands = [
132
+ VSCodeCommand(
133
+ command="workbench.action.terminal.new", args=[{"location": 2}]
134
+ ),
135
+ VSCodeCommand(
136
+ command="workbench.action.terminal.sendSequence",
137
+ args=[{"text": f"{connection.command}\n"}],
138
+ ),
139
+ ]
140
+
141
+ # login: vscode window
142
+ window_btn = cast(
143
+ VSCodeLink, self.query_one(f"#{self.LOGIN_VSCODE_WINDOW_ID}")
144
+ )
145
+ if connection.vscode_command is not None:
146
+ window_btn.commands = [
147
+ VSCodeCommand(
148
+ command=connection.vscode_command[0],
149
+ args=connection.vscode_command[1:],
150
+ )
151
+ ]
152
+
153
+ # command (always available)
154
+ command_instructions_lbl = cast(
155
+ Static, self.query_one(f"#{self.COMMAND_INSTRUCTIONS_ID}")
156
+ )
157
+ command_instructions_lbl.display = vscode
158
+ command_instructions_lbl.update(
159
+ instructions_command.replace("Login", "Alternatively, login", 1)
160
+ )
161
+ command_lbl = cast(Static, self.query_one(f"#{self.SANDBOX_COMMAND_ID}"))
162
+ command_lbl.update(connection.command)
163
+
164
+
165
+ class StatusBar(Horizontal):
166
+ STATUS_ID = "task-status"
167
+ TIME_ID = "task-time"
168
+
169
+ LABEL_CLASS = "status-label"
170
+ VALUE_CLASS = "status-value"
171
+
172
+ DEFAULT_CSS = f"""
173
+ StatusBar {{
174
+ width: 1fr;
175
+ height: 1;
176
+ background: $surface;
177
+ margin-bottom: 1;
178
+ layout: grid;
179
+ grid-size: 4 1;
180
+ grid-columns: auto auto auto auto;
181
+ grid-gutter: 1;
182
+ }}
183
+ .{LABEL_CLASS} {{
184
+ color: $primary;
185
+ }}
186
+ .{VALUE_CLASS} {{
187
+ color: $foreground;
188
+ }}
189
+ StatusBar Link {{
190
+ dock: right;
191
+ margin-right: 1;
192
+ }}
193
+ """
194
+
195
+ running: reactive[bool] = reactive(True)
196
+ time: reactive[float] = reactive(0)
197
+
198
+ def __init__(self) -> None:
199
+ super().__init__()
200
+
201
+ def compose(self) -> ComposeResult:
202
+ yield Label("Status:", classes=self.LABEL_CLASS)
203
+ yield Static("Running", id=self.STATUS_ID, classes=self.VALUE_CLASS)
204
+ yield Label(" Time:", classes=self.LABEL_CLASS)
205
+ yield Static("0:00:00", id=self.TIME_ID, classes=self.VALUE_CLASS)
206
+
207
+ def watch_running(self, running: bool) -> None:
208
+ cast(Static, self.query_one(f"#{self.STATUS_ID}")).update(
209
+ "Running" if running else "Stopped"
210
+ )
211
+
212
+ def watch_time(self, time: float) -> None:
213
+ time_display = format_progress_time(time)
214
+ cast(Static, self.query_one(f"#{self.TIME_ID}")).update(time_display)
215
+
216
+
217
+ class LoadingView(Container):
218
+ ID = "human-agent-loading-view"
219
+
220
+ def __init__(self) -> None:
221
+ super().__init__(id=self.ID)
222
+
223
+ def compose(self) -> ComposeResult:
224
+ yield LoadingIndicator()
225
+ yield Button() # add focusable widget so the tab can activate
226
+
227
+
228
+ class VSCodeLink(Link):
229
+ def __init__(
230
+ self,
231
+ text: str,
232
+ *,
233
+ url: str | None = None,
234
+ tooltip: str | None = None,
235
+ name: str | None = None,
236
+ id: str | None = None,
237
+ classes: str | None = None,
238
+ disabled: bool = False,
239
+ ) -> None:
240
+ super().__init__(
241
+ text,
242
+ url=url,
243
+ tooltip=tooltip,
244
+ name=name,
245
+ id=id,
246
+ classes=classes,
247
+ disabled=disabled,
248
+ )
249
+ self.commands: list[VSCodeCommand] = []
250
+
251
+ def on_click(self) -> None:
252
+ execute_vscode_commands(self.commands)
@@ -0,0 +1,45 @@
1
+ from inspect_ai.model import ModelOutput
2
+ from inspect_ai.util._sandbox import sandbox
3
+ from inspect_ai.util._sandbox.service import sandbox_service
4
+
5
+ from .._task_state import TaskState
6
+ from .commands.command import HumanAgentCommand
7
+ from .state import HumanAgentState
8
+ from .view import HumanAgentView
9
+
10
+
11
+ async def run_human_agent_service(
12
+ state: TaskState, commands: list[HumanAgentCommand], view: HumanAgentView | None
13
+ ) -> TaskState:
14
+ # initialise agent state
15
+ instructions = "\n\n".join([message.text for message in state.messages]).strip()
16
+ agent_state = HumanAgentState(instructions=instructions)
17
+
18
+ # extract service methods from commands
19
+ methods = {
20
+ command.name: command.service(agent_state)
21
+ for command in commands
22
+ if "service" in command.contexts
23
+ }
24
+
25
+ # callback to check if task is completed (use this to periodically
26
+ # update the view with the current state)
27
+ def task_is_completed() -> bool:
28
+ if view:
29
+ view.update_state(agent_state)
30
+ return agent_state.answer is not None
31
+
32
+ # run the service
33
+ await sandbox_service(
34
+ name="human_agent",
35
+ methods=methods,
36
+ until=task_is_completed,
37
+ sandbox=sandbox(),
38
+ )
39
+
40
+ # set the answer if we have one
41
+ if agent_state.answer is not None:
42
+ state.output = ModelOutput.from_content("human_agent", agent_state.answer)
43
+
44
+ # return state
45
+ return state