inspect-ai 0.3.56__py3-none-any.whl → 0.3.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. inspect_ai/_display/core/panel.py +1 -1
  2. inspect_ai/_eval/run.py +16 -11
  3. inspect_ai/_util/datetime.py +1 -1
  4. inspect_ai/_util/deprecation.py +1 -1
  5. inspect_ai/_util/json.py +11 -1
  6. inspect_ai/_util/logger.py +2 -1
  7. inspect_ai/_util/trace.py +39 -3
  8. inspect_ai/_util/transcript.py +36 -7
  9. inspect_ai/_view/www/.prettierrc.js +12 -0
  10. inspect_ai/_view/www/dist/assets/index.js +286 -224
  11. inspect_ai/_view/www/log-schema.json +124 -125
  12. inspect_ai/_view/www/src/App.mjs +18 -9
  13. inspect_ai/_view/www/src/Types.mjs +0 -1
  14. inspect_ai/_view/www/src/api/Types.mjs +15 -4
  15. inspect_ai/_view/www/src/api/api-http.mjs +2 -0
  16. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
  17. inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
  18. inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
  19. inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
  20. inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
  21. inspect_ai/_view/www/src/components/Tools.mjs +18 -3
  22. inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
  23. inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
  24. inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
  25. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
  26. inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
  27. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
  28. inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
  29. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
  30. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
  31. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
  32. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
  33. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
  34. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
  35. inspect_ai/_view/www/src/types/log.d.ts +2 -8
  36. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  37. inspect_ai/log/_log.py +25 -0
  38. inspect_ai/log/_recorders/eval.py +2 -0
  39. inspect_ai/model/_call_tools.py +27 -5
  40. inspect_ai/model/_providers/google.py +24 -6
  41. inspect_ai/model/_providers/openai.py +17 -3
  42. inspect_ai/model/_providers/openai_o1.py +10 -12
  43. inspect_ai/tool/_tool_info.py +2 -1
  44. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
  45. inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -3
  46. inspect_ai/util/__init__.py +4 -0
  47. inspect_ai/util/_sandbox/docker/compose.py +1 -3
  48. inspect_ai/util/_sandbox/docker/util.py +2 -1
  49. inspect_ai/util/_sandbox/self_check.py +18 -18
  50. inspect_ai/util/_store.py +2 -2
  51. inspect_ai/util/_subprocess.py +3 -3
  52. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
  53. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +57 -56
  54. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
  55. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
  56. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
  57. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -112,7 +112,7 @@ def tasks_title(completed: int, total: int) -> str:
112
112
  def task_title(profile: TaskProfile, show_model: bool) -> str:
113
113
  eval_epochs = profile.eval_config.epochs or 1
114
114
  epochs = f" x {profile.eval_config.epochs}" if eval_epochs > 1 else ""
115
- samples = f"{profile.samples//eval_epochs:,}{epochs} sample{'s' if profile.samples != 1 else ''}"
115
+ samples = f"{profile.samples // eval_epochs:,}{epochs} sample{'s' if profile.samples != 1 else ''}"
116
116
  title = f"{registry_unqualified_name(profile.name)} ({samples})"
117
117
  if show_model:
118
118
  title = f"{title}: {profile.model}"
inspect_ai/_eval/run.py CHANGED
@@ -42,7 +42,7 @@ from .task.log import TaskLogger
42
42
  from .task.run import TaskRunOptions, task_run
43
43
  from .task.rundir import task_run_dir_switching
44
44
  from .task.sandbox import TaskSandboxEnvironment, resolve_sandbox_for_task
45
- from .task.util import task_run_dir
45
+ from .task.util import slice_dataset, task_run_dir
46
46
 
47
47
  log = logging.getLogger(__name__)
48
48
 
@@ -70,12 +70,23 @@ async def eval_run(
70
70
  # get cwd before switching to task dir
71
71
  eval_wd = os.getcwd()
72
72
 
73
+ # ensure sample ids
74
+ for resolved_task in tasks:
75
+ # add sample ids to dataset if they aren't there (start at 1 not 0)
76
+ task = resolved_task.task
77
+ for id, sample in enumerate(task.dataset):
78
+ if sample.id is None:
79
+ sample.id = id + 1
80
+
81
+ # Ensure sample ids are unique
82
+ ensure_unique_ids(task.dataset)
83
+
73
84
  # run startup pass for the sandbox environments
74
85
  shutdown_sandbox_environments: Callable[[], Awaitable[None]] | None = None
75
86
  if has_sandbox:
76
87
  cleanup = eval_config.sandbox_cleanup is not False
77
88
  shutdown_sandbox_environments = await startup_sandbox_environments(
78
- resolve_sandbox_environment(eval_sandbox), tasks, cleanup
89
+ resolve_sandbox_environment(eval_sandbox), tasks, eval_config, cleanup
79
90
  )
80
91
 
81
92
  # resolve solver and solver spec
@@ -146,14 +157,6 @@ async def eval_run(
146
157
  else:
147
158
  task.fail_on_error = task_eval_config.fail_on_error
148
159
 
149
- # add sample ids to dataset if they aren't there (start at 1 not 0)
150
- for id, sample in enumerate(task.dataset):
151
- if sample.id is None:
152
- sample.id = id + 1
153
-
154
- # Ensure sample ids are unique
155
- ensure_unique_ids(task.dataset)
156
-
157
160
  # create and track the logger
158
161
  logger = TaskLogger(
159
162
  task_name=task.name,
@@ -340,13 +343,15 @@ async def run_multiple(tasks: list[TaskRunOptions], parallel: int) -> list[EvalL
340
343
  async def startup_sandbox_environments(
341
344
  eval_sandbox: SandboxEnvironmentSpec | None,
342
345
  tasks: list[ResolvedTask],
346
+ config: EvalConfig,
343
347
  cleanup: bool,
344
348
  ) -> Callable[[], Awaitable[None]]:
345
349
  # find unique sandboxenvs
346
350
  sandboxenvs: Set[TaskSandboxEnvironment] = set()
347
351
  for task in tasks:
348
352
  # resolve each sample and add to sandboxenvs
349
- for sample in task.task.dataset:
353
+ dataset = slice_dataset(task.task.dataset, config.limit, config.sample_id)
354
+ for sample in dataset:
350
355
  sandbox = resolve_sandbox_for_task(eval_sandbox, task.task, sample)
351
356
  if sandbox is not None and sandbox not in sandboxenvs:
352
357
  sandboxenvs.add(sandbox)
@@ -4,7 +4,7 @@ from typing import Literal
4
4
 
5
5
  def iso_now(
6
6
  timespec: Literal[
7
- "auto", "hours", "minutes", "seconds", "milliseconds" "microseconds"
7
+ "auto", "hours", "minutes", "seconds", "milliseconds", "microseconds"
8
8
  ] = "seconds",
9
9
  ) -> str:
10
10
  return datetime.now().astimezone().isoformat(timespec=timespec)
@@ -174,7 +174,7 @@ def default_deprecation_msg(
174
174
 
175
175
  _qual = getattr(obj, "__qualname__", "") or ""
176
176
  if _qual.endswith(".__init__") or _qual.endswith(".__new__"):
177
- _obj = f' class ({_qual.rsplit(".", 1)[0]})'
177
+ _obj = f" class ({_qual.rsplit('.', 1)[0]})"
178
178
  elif _qual and _obj:
179
179
  _obj += f" ({_qual})"
180
180
 
inspect_ai/_util/json.py CHANGED
@@ -103,10 +103,20 @@ def json_changes(
103
103
  paths = json_change.path.split("/")[1:]
104
104
  replaced = before
105
105
  for path in paths:
106
- index: Any = int(path) if path.isnumeric() else path
106
+ decoded_path = decode_json_pointer_segment(path)
107
+ index: Any = (
108
+ int(decoded_path) if decoded_path.isnumeric() else decoded_path
109
+ )
107
110
  replaced = replaced[index]
108
111
  json_change.replaced = replaced
109
112
  changes.append(json_change)
110
113
  return changes
111
114
  else:
112
115
  return None
116
+
117
+
118
+ def decode_json_pointer_segment(segment: str) -> str:
119
+ """Decode a single JSON Pointer segment."""
120
+ # JSON points encode ~ and / because they are special characters
121
+ # this decodes these values (https://www.rfc-editor.org/rfc/rfc6901)
122
+ return segment.replace("~1", "/").replace("~0", "~")
@@ -1,5 +1,6 @@
1
1
  import atexit
2
2
  import os
3
+ import re
3
4
  from logging import (
4
5
  DEBUG,
5
6
  INFO,
@@ -182,7 +183,7 @@ def notify_logger_record(record: LogRecord, write: bool) -> None:
182
183
  if write:
183
184
  transcript()._event(LoggerEvent(message=LoggingMessage.from_log_record(record)))
184
185
  global _rate_limit_count
185
- if (record.levelno <= INFO and "429" in record.getMessage()) or (
186
+ if (record.levelno <= INFO and re.search(r"\b429\b", record.getMessage())) or (
186
187
  record.levelno == DEBUG
187
188
  # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#validating-retry-attempts
188
189
  # for boto retry logic / log messages (this is tracking standard or adapative retries)
inspect_ai/_util/trace.py CHANGED
@@ -33,6 +33,22 @@ def inspect_trace_file() -> Path:
33
33
  def trace_action(
34
34
  logger: Logger, action: str, message: str, *args: Any, **kwargs: Any
35
35
  ) -> Generator[None, None, None]:
36
+ """Trace a long running or poentially unreliable action.
37
+
38
+ Trace actions for which you want to collect data on the resolution
39
+ (e.g. succeeded, cancelled, failed, timed out, etc.) and duration of.
40
+
41
+ Traces are written to the `TRACE` log level (which is just below
42
+ `HTTP` and `INFO`). List and read trace logs with `inspect trace list`
43
+ and related commands (see `inspect trace --help` for details).
44
+
45
+ Args:
46
+ logger (Logger): Logger to use for tracing (e.g. from `getLogger(__name__)`)
47
+ action (str): Name of action to trace (e.g. 'Model', 'Subprocess', etc.)
48
+ message (str): Message describing action (can be a format string w/ args or kwargs)
49
+ *args (Any): Positional arguments for `message` format string.
50
+ **kwargs (Any): Named args for `message` format string.
51
+ """
36
52
  trace_id = uuid()
37
53
  start_monotonic = time.monotonic()
38
54
  start_wall = time.time()
@@ -117,6 +133,19 @@ def trace_action(
117
133
  def trace_message(
118
134
  logger: Logger, category: str, message: str, *args: Any, **kwargs: Any
119
135
  ) -> None:
136
+ """Log a message using the TRACE log level.
137
+
138
+ The `TRACE` log level is just below `HTTP` and `INFO`). List and
139
+ read trace logs with `inspect trace list` and related commands
140
+ (see `inspect trace --help` for details).
141
+
142
+ Args:
143
+ logger (Logger): Logger to use for tracing (e.g. from `getLogger(__name__)`)
144
+ category (str): Category of trace message.
145
+ message (str): Trace message (can be a format string w/ args or kwargs)
146
+ *args (Any): Positional arguments for `message` format string.
147
+ **kwargs (Any): Named args for `message` format string.
148
+ """
120
149
  logger.log(TRACE, f"[{category}] {message}", *args, **kwargs)
121
150
 
122
151
 
@@ -250,9 +279,16 @@ def read_trace_file(file: Path) -> list[TraceRecord]:
250
279
 
251
280
 
252
281
  def rotate_trace_files() -> None:
253
- rotate_files = list_trace_files()[10:]
254
- for file in rotate_files:
255
- file.file.unlink(missing_ok=True)
282
+ # if multiple inspect processes start up at once they
283
+ # will all be attempting to rotate at the same time,
284
+ # which can lead to FileNotFoundError -- ignore these
285
+ # errors if they occur
286
+ try:
287
+ rotate_files = list_trace_files()[10:]
288
+ for file in rotate_files:
289
+ file.file.unlink(missing_ok=True)
290
+ except FileNotFoundError:
291
+ pass
256
292
 
257
293
 
258
294
  def compress_trace_log(log_handler: FileHandler) -> Callable[[], None]:
@@ -1,4 +1,5 @@
1
1
  import html
2
+ import re
2
3
  from typing import Any
3
4
 
4
5
  from rich.align import AlignMethod
@@ -19,13 +20,43 @@ def transcript_code_theme() -> str:
19
20
  def transcript_markdown(content: str, *, escape: bool = False) -> Markdown:
20
21
  code_theme = transcript_code_theme()
21
22
  return Markdown(
22
- html.escape(content) if escape else content,
23
+ html_escape_markdown(content) if escape else content,
23
24
  code_theme=code_theme,
24
25
  inline_code_lexer="python",
25
26
  inline_code_theme=code_theme,
26
27
  )
27
28
 
28
29
 
30
+ def html_escape_markdown(content: str) -> str:
31
+ """Escape markdown lines that aren't in a code block."""
32
+ codeblock_pattern = re.compile("`{3,}")
33
+ current_codeblock = ""
34
+ escaped: list[str] = []
35
+ lines = content.splitlines()
36
+ for line in lines:
37
+ # look for matching end of codeblock
38
+ if current_codeblock:
39
+ if current_codeblock in line:
40
+ current_codeblock = ""
41
+ escaped.append(line)
42
+ continue
43
+
44
+ # look for beginning of codeblock
45
+ match = codeblock_pattern.search(line)
46
+ if match:
47
+ current_codeblock = match[0]
48
+ escaped.append(line)
49
+ continue
50
+
51
+ # escape if we are not in a codeblock
52
+ if current_codeblock:
53
+ escaped.append(line)
54
+ else:
55
+ escaped.append(html.escape(line, quote=False))
56
+
57
+ return "\n".join(escaped)
58
+
59
+
29
60
  def set_transcript_markdown_options(markdown: Markdown) -> None:
30
61
  code_theme = transcript_code_theme()
31
62
  markdown.code_theme = code_theme
@@ -89,12 +120,10 @@ def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableT
89
120
  return transcript_markdown("```python\n" + call + "\n```\n")
90
121
 
91
122
 
92
- DOUBLE_LINE = Box(
93
- " ══ \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n"
94
- )
123
+ DOUBLE_LINE = Box(" ══ \n \n \n \n \n \n \n \n")
95
124
 
96
- LINE = Box(" ── \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
125
+ LINE = Box(" ── \n \n \n \n \n \n \n \n")
97
126
 
98
- DOTTED = Box(" ·· \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
127
+ DOTTED = Box(" ·· \n \n \n \n \n \n \n \n")
99
128
 
100
- NOBORDER = Box(" \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
129
+ NOBORDER = Box(" \n \n \n \n \n \n \n \n")
@@ -0,0 +1,12 @@
1
+ // Do not remove this file even if the config is empty!
2
+ // VSCode's "Format Document" will respect this config and use the default
3
+ // settings, which is what we want. Without prettierrc, VSCode falls back to
4
+ // users settings, which could be different.
5
+
6
+ /**
7
+ * @see https://prettier.io/docs/en/configuration.html
8
+ * @type {import("prettier").Config}
9
+ */
10
+ const config = {};
11
+
12
+ export default config;