inspect-ai 0.3.55__py3-none-any.whl → 0.3.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. inspect_ai/__init__.py +1 -0
  2. inspect_ai/_cli/common.py +1 -1
  3. inspect_ai/_cli/trace.py +33 -20
  4. inspect_ai/_display/core/active.py +1 -1
  5. inspect_ai/_display/core/display.py +1 -1
  6. inspect_ai/_display/core/footer.py +1 -1
  7. inspect_ai/_display/core/progress.py +0 -6
  8. inspect_ai/_display/core/rich.py +1 -1
  9. inspect_ai/_display/rich/display.py +2 -2
  10. inspect_ai/_display/textual/app.py +15 -17
  11. inspect_ai/_display/textual/widgets/clock.py +3 -3
  12. inspect_ai/_display/textual/widgets/samples.py +6 -13
  13. inspect_ai/_eval/context.py +9 -1
  14. inspect_ai/_eval/score.py +4 -10
  15. inspect_ai/_eval/task/results.py +5 -4
  16. inspect_ai/_eval/task/run.py +6 -12
  17. inspect_ai/_eval/task/task.py +10 -0
  18. inspect_ai/_util/ansi.py +31 -0
  19. inspect_ai/_util/format.py +7 -0
  20. inspect_ai/_util/logger.py +12 -12
  21. inspect_ai/_util/throttle.py +10 -1
  22. inspect_ai/_util/trace.py +43 -47
  23. inspect_ai/_util/transcript.py +4 -0
  24. inspect_ai/_util/vscode.py +51 -0
  25. inspect_ai/_view/notify.py +2 -1
  26. inspect_ai/_view/www/App.css +22 -1
  27. inspect_ai/_view/www/dist/assets/index.css +2374 -2
  28. inspect_ai/_view/www/dist/assets/index.js +29622 -24424
  29. inspect_ai/_view/www/log-schema.json +138 -90
  30. inspect_ai/_view/www/package.json +1 -0
  31. inspect_ai/_view/www/src/App.mjs +1 -0
  32. inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
  33. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
  34. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
  35. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
  36. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
  37. inspect_ai/_view/www/src/components/Tools.mjs +11 -3
  38. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
  39. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
  40. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
  41. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
  42. inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
  43. inspect_ai/_view/www/src/types/log.d.ts +26 -12
  44. inspect_ai/_view/www/yarn.lock +44 -0
  45. inspect_ai/approval/_apply.py +4 -0
  46. inspect_ai/approval/_human/panel.py +5 -8
  47. inspect_ai/dataset/_dataset.py +51 -10
  48. inspect_ai/dataset/_util.py +31 -3
  49. inspect_ai/log/__init__.py +2 -0
  50. inspect_ai/log/_log.py +5 -2
  51. inspect_ai/model/_call_tools.py +4 -2
  52. inspect_ai/model/_chat_message.py +3 -0
  53. inspect_ai/model/_model.py +42 -1
  54. inspect_ai/model/_providers/anthropic.py +4 -0
  55. inspect_ai/model/_render.py +9 -2
  56. inspect_ai/scorer/_metric.py +12 -1
  57. inspect_ai/solver/__init__.py +2 -0
  58. inspect_ai/solver/_human_agent/agent.py +83 -0
  59. inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
  60. inspect_ai/solver/_human_agent/commands/clock.py +70 -0
  61. inspect_ai/solver/_human_agent/commands/command.py +59 -0
  62. inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
  63. inspect_ai/solver/_human_agent/commands/note.py +42 -0
  64. inspect_ai/solver/_human_agent/commands/score.py +80 -0
  65. inspect_ai/solver/_human_agent/commands/status.py +62 -0
  66. inspect_ai/solver/_human_agent/commands/submit.py +151 -0
  67. inspect_ai/solver/_human_agent/install.py +222 -0
  68. inspect_ai/solver/_human_agent/panel.py +252 -0
  69. inspect_ai/solver/_human_agent/service.py +45 -0
  70. inspect_ai/solver/_human_agent/state.py +55 -0
  71. inspect_ai/solver/_human_agent/view.py +24 -0
  72. inspect_ai/solver/_task_state.py +28 -2
  73. inspect_ai/tool/_tool.py +10 -2
  74. inspect_ai/tool/_tools/_web_browser/_web_browser.py +13 -10
  75. inspect_ai/util/__init__.py +8 -4
  76. inspect_ai/{_util/display.py → util/_display.py} +6 -0
  77. inspect_ai/util/_panel.py +31 -9
  78. inspect_ai/util/_sandbox/__init__.py +0 -3
  79. inspect_ai/util/_sandbox/context.py +5 -1
  80. inspect_ai/util/_sandbox/docker/compose.py +16 -10
  81. inspect_ai/util/_sandbox/docker/docker.py +9 -6
  82. inspect_ai/util/_sandbox/docker/internal.py +1 -1
  83. inspect_ai/util/_sandbox/docker/util.py +2 -2
  84. inspect_ai/util/_sandbox/environment.py +6 -5
  85. inspect_ai/util/_sandbox/local.py +1 -1
  86. inspect_ai/util/_sandbox/service.py +22 -7
  87. inspect_ai/util/_store.py +5 -6
  88. inspect_ai/util/_store_model.py +110 -0
  89. inspect_ai/util/_throttle.py +32 -0
  90. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/METADATA +1 -1
  91. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/RECORD +95 -73
  92. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/LICENSE +0 -0
  93. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/WHEEL +0 -0
  94. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/entry_points.txt +0 -0
  95. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.56.dist-info}/top_level.txt +0 -0
inspect_ai/_util/trace.py CHANGED
@@ -8,10 +8,10 @@ import shutil
8
8
  import time
9
9
  import traceback
10
10
  from contextlib import contextmanager
11
- from logging import Logger
12
- from logging.handlers import RotatingFileHandler
11
+ from dataclasses import dataclass
12
+ from logging import FileHandler, Logger
13
13
  from pathlib import Path
14
- from typing import Any, Generator, Literal, TextIO
14
+ from typing import Any, Callable, Generator, Literal, TextIO
15
15
 
16
16
  import jsonlines
17
17
  from pydantic import BaseModel, Field, JsonValue
@@ -25,6 +25,10 @@ def inspect_trace_dir() -> Path:
25
25
  return inspect_data_dir("traces")
26
26
 
27
27
 
28
+ def inspect_trace_file() -> Path:
29
+ return inspect_trace_dir() / f"trace-{os.getpid()}.log"
30
+
31
+
28
32
  @contextmanager
29
33
  def trace_action(
30
34
  logger: Logger, action: str, message: str, *args: Any, **kwargs: Any
@@ -32,7 +36,6 @@ def trace_action(
32
36
  trace_id = uuid()
33
37
  start_monotonic = time.monotonic()
34
38
  start_wall = time.time()
35
- pid = os.getpid()
36
39
  detail = message % args if args else message % kwargs if kwargs else message
37
40
 
38
41
  def trace_message(event: str) -> str:
@@ -47,7 +50,6 @@ def trace_action(
47
50
  "event": "enter",
48
51
  "trace_id": str(trace_id),
49
52
  "start_time": start_wall,
50
- "pid": pid,
51
53
  },
52
54
  )
53
55
 
@@ -63,7 +65,6 @@ def trace_action(
63
65
  "event": "exit",
64
66
  "trace_id": str(trace_id),
65
67
  "duration": duration,
66
- "pid": pid,
67
68
  },
68
69
  )
69
70
  except (KeyboardInterrupt, asyncio.CancelledError):
@@ -77,7 +78,6 @@ def trace_action(
77
78
  "event": "cancel",
78
79
  "trace_id": str(trace_id),
79
80
  "duration": duration,
80
- "pid": pid,
81
81
  },
82
82
  )
83
83
  raise
@@ -92,7 +92,6 @@ def trace_action(
92
92
  "event": "timeout",
93
93
  "trace_id": str(trace_id),
94
94
  "duration": duration,
95
- "pid": pid,
96
95
  },
97
96
  )
98
97
  raise
@@ -110,7 +109,6 @@ def trace_action(
110
109
  "error": getattr(ex, "message", str(ex)) or repr(ex),
111
110
  "error_type": type(ex).__name__,
112
111
  "stacktrace": traceback.format_exc(),
113
- "pid": pid,
114
112
  },
115
113
  )
116
114
  raise
@@ -153,7 +151,6 @@ class TraceFormatter(logging.Formatter):
153
151
  "error",
154
152
  "error_type",
155
153
  "stacktrace",
156
- "pid",
157
154
  ]:
158
155
  if hasattr(record, key):
159
156
  output[key] = getattr(record, key)
@@ -215,7 +212,22 @@ class ActionTraceRecord(TraceRecord):
215
212
  error: str | None = Field(default=None)
216
213
  error_type: str | None = Field(default=None)
217
214
  stacktrace: str | None = Field(default=None)
218
- pid: int | None = Field(default=None)
215
+
216
+
217
+ @dataclass
218
+ class TraceFile:
219
+ file: Path
220
+ mtime: float
221
+
222
+
223
+ def list_trace_files() -> list[TraceFile]:
224
+ trace_files: list[TraceFile] = [
225
+ TraceFile(file=f, mtime=f.lstat().st_mtime)
226
+ for f in inspect_trace_dir().iterdir()
227
+ if f.is_file()
228
+ ]
229
+ trace_files.sort(key=lambda f: f.mtime, reverse=True)
230
+ return trace_files
219
231
 
220
232
 
221
233
  def read_trace_file(file: Path) -> list[TraceRecord]:
@@ -237,39 +249,23 @@ def read_trace_file(file: Path) -> list[TraceRecord]:
237
249
  return read_file(f)
238
250
 
239
251
 
240
- class TraceFileHandler(RotatingFileHandler):
241
- def __init__(
242
- self,
243
- filename: str,
244
- mode: str = "a",
245
- maxBytes: int = 0,
246
- backupCount: int = 0,
247
- encoding: str | None = None,
248
- delay: bool = False,
249
- ) -> None:
250
- super().__init__(filename, mode, maxBytes, backupCount, encoding, delay)
251
-
252
- def rotation_filename(self, default_name: str) -> str:
253
- """
254
- Returns the name of the rotated file.
255
-
256
- Args:
257
- default_name: The default name that would be used for rotation
258
-
259
- Returns:
260
- The modified filename with .gz extension
261
- """
262
- return default_name + ".gz"
263
-
264
- def rotate(self, source: str, dest: str) -> None:
265
- """
266
- Compresses the source file and moves it to destination.
267
-
268
- Args:
269
- source: The source file to be compressed
270
- dest: The destination path for the compressed file
271
- """
272
- with open(source, "rb") as f_in:
273
- with gzip.open(dest, "wb") as f_out:
274
- shutil.copyfileobj(f_in, f_out)
275
- os.remove(source)
252
+ def rotate_trace_files() -> None:
253
+ rotate_files = list_trace_files()[10:]
254
+ for file in rotate_files:
255
+ file.file.unlink(missing_ok=True)
256
+
257
+
258
+ def compress_trace_log(log_handler: FileHandler) -> Callable[[], None]:
259
+ def compress() -> None:
260
+ # ensure log is closed
261
+ log_handler.close()
262
+
263
+ # compress
264
+ trace_file = Path(log_handler.baseFilename)
265
+ if trace_file.exists():
266
+ with open(trace_file, "rb") as f_in:
267
+ with gzip.open(trace_file.with_suffix(".log.gz"), "wb") as f_out:
268
+ shutil.copyfileobj(f_in, f_out)
269
+ trace_file.unlink()
270
+
271
+ return compress
@@ -89,6 +89,10 @@ def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableT
89
89
  return transcript_markdown("```python\n" + call + "\n```\n")
90
90
 
91
91
 
92
+ DOUBLE_LINE = Box(
93
+ " ══ \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n"
94
+ )
95
+
92
96
  LINE = Box(" ── \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
93
97
 
94
98
  DOTTED = Box(" ·· \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
@@ -0,0 +1,51 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ from pydantic import BaseModel, Field
6
+ from pydantic_core import to_json
7
+ from shortuuid import uuid
8
+
9
+ from .appdirs import inspect_data_dir
10
+
11
+
12
+ class VSCodeCommand(BaseModel):
13
+ command: str
14
+ args: list[Any] = Field(default_factory=list)
15
+
16
+
17
+ def execute_vscode_commands(commands: VSCodeCommand | list[VSCodeCommand]) -> None:
18
+ # resolve to list
19
+ commands = commands if isinstance(commands, list) else [commands]
20
+
21
+ # ensure there is someone listening
22
+ command_dir = vs_code_commands_dir()
23
+ if command_dir is None:
24
+ raise NotImplementedError(
25
+ "Not running in VS Code session or have older version of Inspect AI extension"
26
+ )
27
+
28
+ command_file = command_dir / uuid()
29
+ with open(command_file, "w") as f:
30
+ f.write(to_json(commands).decode())
31
+
32
+
33
+ def can_execute_vscode_commands() -> bool:
34
+ return vs_code_commands_dir() is not None
35
+
36
+
37
+ def vs_code_commands_dir() -> Path | None:
38
+ workspace_id = vscode_workspace_id()
39
+ if workspace_id:
40
+ workspace_dir = inspect_data_dir(os.path.join("vscode", workspace_id))
41
+ if workspace_dir.exists():
42
+ commands_dir = workspace_dir / "commands"
43
+ return commands_dir if commands_dir.exists() else None
44
+ else:
45
+ return None
46
+ else:
47
+ return None
48
+
49
+
50
+ def vscode_workspace_id() -> str | None:
51
+ return os.environ.get("INSPECT_WORKSPACE_ID", None)
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
  from urllib.parse import urlparse
5
5
 
6
6
  from inspect_ai._util.appdirs import inspect_data_dir
7
+ from inspect_ai._util.vscode import vscode_workspace_id
7
8
 
8
9
  # lightweight tracking of when the last eval task completed
9
10
  # this enables the view client to poll for changes frequently
@@ -24,7 +25,7 @@ def view_notify_eval(location: str) -> None:
24
25
  payload = {
25
26
  "location": location,
26
27
  }
27
- workspace_id = os.environ.get("INSPECT_WORKSPACE_ID")
28
+ workspace_id = vscode_workspace_id()
28
29
  if workspace_id:
29
30
  payload["workspace_id"] = workspace_id
30
31
 
@@ -235,6 +235,10 @@ body[class^="vscode-"] .sidebar .list-group {
235
235
  --bs-list-group-active-color: var(--vscode-sideBarSectionHeader-foreground);
236
236
  }
237
237
 
238
+ body[class^="vscode-"] div.ap-control-bar .ap-fullscreen-button {
239
+ display: none;
240
+ }
241
+
238
242
  :root {
239
243
  --bs-navbar-padding-y: 0;
240
244
  --bs-navbar-brand-padding-y: 0;
@@ -690,7 +694,7 @@ table.table.table-sm td {
690
694
  }
691
695
 
692
696
  @keyframes moveLeftToRight {
693
- from {
697
+ from {
694
698
  margin-left: 0;
695
699
  }
696
700
  to {
@@ -724,6 +728,23 @@ pre[class*="language-"].tool-output {
724
728
  border-radius: var(--bs-border-radius) !important;
725
729
  }
726
730
 
731
+ /* lightbox styles */
732
+
733
+
734
+ .lightbox-overlay .close-button,
735
+ .lightbox-overlay .nav-button {
736
+ /* Hide by default */
737
+ opacity: 0;
738
+ pointer-events: none; /* so it doesn't register clicks when hidden */
739
+ transition: opacity 0.3s ease;
740
+ }
741
+
742
+ .lightbox-overlay:hover .close-button,
743
+ .lightbox-overlay .nav-button {
744
+ /* Show on hover */
745
+ opacity: 1;
746
+ pointer-events: auto;
747
+ }
727
748
 
728
749
  /* jsondiffpatch */
729
750