inspect-ai 0.3.56__py3-none-any.whl → 0.3.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/common.py +4 -2
  3. inspect_ai/_cli/eval.py +2 -0
  4. inspect_ai/_cli/trace.py +21 -2
  5. inspect_ai/_display/core/active.py +0 -2
  6. inspect_ai/_display/core/panel.py +1 -1
  7. inspect_ai/_display/rich/display.py +4 -4
  8. inspect_ai/_display/textual/app.py +4 -1
  9. inspect_ai/_display/textual/widgets/samples.py +41 -5
  10. inspect_ai/_eval/eval.py +32 -20
  11. inspect_ai/_eval/evalset.py +7 -5
  12. inspect_ai/_eval/run.py +16 -11
  13. inspect_ai/_eval/task/__init__.py +2 -2
  14. inspect_ai/_eval/task/images.py +40 -25
  15. inspect_ai/_eval/task/run.py +141 -119
  16. inspect_ai/_eval/task/task.py +140 -25
  17. inspect_ai/_util/constants.py +1 -0
  18. inspect_ai/_util/content.py +23 -1
  19. inspect_ai/_util/datetime.py +1 -1
  20. inspect_ai/_util/deprecation.py +1 -1
  21. inspect_ai/_util/images.py +20 -17
  22. inspect_ai/_util/json.py +11 -1
  23. inspect_ai/_util/kvstore.py +73 -0
  24. inspect_ai/_util/logger.py +2 -1
  25. inspect_ai/_util/notgiven.py +18 -0
  26. inspect_ai/_util/thread.py +5 -0
  27. inspect_ai/_util/trace.py +39 -3
  28. inspect_ai/_util/transcript.py +36 -7
  29. inspect_ai/_view/www/.prettierrc.js +12 -0
  30. inspect_ai/_view/www/dist/assets/index.js +322 -226
  31. inspect_ai/_view/www/log-schema.json +221 -138
  32. inspect_ai/_view/www/src/App.mjs +18 -9
  33. inspect_ai/_view/www/src/Types.mjs +0 -1
  34. inspect_ai/_view/www/src/api/Types.mjs +15 -4
  35. inspect_ai/_view/www/src/api/api-http.mjs +2 -0
  36. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
  37. inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
  38. inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
  39. inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
  40. inspect_ai/_view/www/src/components/MessageContent.mjs +44 -2
  41. inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
  42. inspect_ai/_view/www/src/components/Tools.mjs +18 -3
  43. inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
  44. inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
  45. inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
  46. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
  47. inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
  48. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
  49. inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
  50. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +242 -178
  51. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
  52. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
  53. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
  54. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
  55. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
  56. inspect_ai/_view/www/src/types/log.d.ts +53 -35
  57. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  58. inspect_ai/approval/_human/util.py +2 -2
  59. inspect_ai/dataset/_sources/csv.py +2 -1
  60. inspect_ai/dataset/_sources/json.py +2 -1
  61. inspect_ai/dataset/_sources/util.py +15 -7
  62. inspect_ai/log/_condense.py +11 -1
  63. inspect_ai/log/_log.py +27 -5
  64. inspect_ai/log/_recorders/eval.py +21 -8
  65. inspect_ai/log/_samples.py +10 -5
  66. inspect_ai/log/_transcript.py +28 -1
  67. inspect_ai/model/__init__.py +10 -2
  68. inspect_ai/model/_call_tools.py +82 -17
  69. inspect_ai/model/_chat_message.py +2 -4
  70. inspect_ai/model/{_trace.py → _conversation.py} +9 -8
  71. inspect_ai/model/_model.py +2 -2
  72. inspect_ai/model/_providers/anthropic.py +9 -7
  73. inspect_ai/model/_providers/azureai.py +6 -4
  74. inspect_ai/model/_providers/bedrock.py +6 -4
  75. inspect_ai/model/_providers/google.py +103 -14
  76. inspect_ai/model/_providers/groq.py +7 -5
  77. inspect_ai/model/_providers/hf.py +11 -6
  78. inspect_ai/model/_providers/mistral.py +6 -9
  79. inspect_ai/model/_providers/openai.py +34 -8
  80. inspect_ai/model/_providers/openai_o1.py +10 -12
  81. inspect_ai/model/_providers/vertex.py +17 -4
  82. inspect_ai/scorer/__init__.py +13 -2
  83. inspect_ai/scorer/_metrics/__init__.py +2 -2
  84. inspect_ai/scorer/_metrics/std.py +3 -3
  85. inspect_ai/tool/__init__.py +9 -1
  86. inspect_ai/tool/_tool.py +9 -2
  87. inspect_ai/tool/_tool_info.py +2 -1
  88. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
  89. inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -3
  90. inspect_ai/util/__init__.py +4 -3
  91. inspect_ai/util/{_trace.py → _conversation.py} +3 -17
  92. inspect_ai/util/_display.py +14 -4
  93. inspect_ai/util/_sandbox/context.py +12 -13
  94. inspect_ai/util/_sandbox/docker/compose.py +24 -13
  95. inspect_ai/util/_sandbox/docker/docker.py +20 -13
  96. inspect_ai/util/_sandbox/docker/util.py +2 -1
  97. inspect_ai/util/_sandbox/environment.py +13 -1
  98. inspect_ai/util/_sandbox/local.py +1 -0
  99. inspect_ai/util/_sandbox/self_check.py +18 -18
  100. inspect_ai/util/_store.py +2 -2
  101. inspect_ai/util/_subprocess.py +3 -3
  102. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/METADATA +3 -3
  103. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/RECORD +107 -103
  104. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/WHEEL +1 -1
  105. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/LICENSE +0 -0
  106. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/entry_points.txt +0 -0
  107. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/top_level.txt +0 -0
@@ -25,5 +25,27 @@ class ContentImage(BaseModel):
25
25
  """
26
26
 
27
27
 
28
- Content = Union[ContentText, ContentImage]
28
+ class ContentAudio(BaseModel):
29
+ type: Literal["audio"] = Field(default="audio")
30
+ """Type."""
31
+
32
+ audio: str
33
+ """Audio file path or base64 encoded data URL."""
34
+
35
+ format: Literal["wav", "mp3"]
36
+ """Format of audio data ('mp3' or 'wav')"""
37
+
38
+
39
+ class ContentVideo(BaseModel):
40
+ type: Literal["video"] = Field(default="video")
41
+ """Type."""
42
+
43
+ video: str
44
+ """Audio file path or base64 encoded data URL."""
45
+
46
+ format: Literal["mp4", "mpeg", "mov"]
47
+ """Format of video data ('mp4', 'mpeg', or 'mov')"""
48
+
49
+
50
+ Content = Union[ContentText, ContentImage, ContentAudio, ContentVideo]
29
51
  """Content sent to or received from a model."""
@@ -4,7 +4,7 @@ from typing import Literal
4
4
 
5
5
  def iso_now(
6
6
  timespec: Literal[
7
- "auto", "hours", "minutes", "seconds", "milliseconds" "microseconds"
7
+ "auto", "hours", "minutes", "seconds", "milliseconds", "microseconds"
8
8
  ] = "seconds",
9
9
  ) -> str:
10
10
  return datetime.now().astimezone().isoformat(timespec=timespec)
@@ -174,7 +174,7 @@ def default_deprecation_msg(
174
174
 
175
175
  _qual = getattr(obj, "__qualname__", "") or ""
176
176
  if _qual.endswith(".__init__") or _qual.endswith(".__new__"):
177
- _obj = f' class ({_qual.rsplit(".", 1)[0]})'
177
+ _obj = f" class ({_qual.rsplit('.', 1)[0]})"
178
178
  elif _qual and _obj:
179
179
  _obj += f" ({_qual})"
180
180
 
@@ -3,7 +3,7 @@ import mimetypes
3
3
 
4
4
  import httpx
5
5
 
6
- from .file import file
6
+ from .file import file as open_file
7
7
  from .url import (
8
8
  data_uri_mime_type,
9
9
  data_uri_to_base64,
@@ -12,34 +12,37 @@ from .url import (
12
12
  )
13
13
 
14
14
 
15
- async def image_as_data(image: str) -> tuple[bytes, str]:
16
- if is_data_uri(image):
15
+ async def file_as_data(file: str) -> tuple[bytes, str]:
16
+ if is_data_uri(file):
17
17
  # resolve mime type and base64 content
18
- mime_type = data_uri_mime_type(image) or "image/png"
19
- image_base64 = data_uri_to_base64(image)
20
- image_bytes = base64.b64decode(image_base64)
18
+ mime_type = data_uri_mime_type(file) or "image/png"
19
+ file_base64 = data_uri_to_base64(file)
20
+ file_bytes = base64.b64decode(file_base64)
21
21
  else:
22
22
  # guess mime type; need strict=False for webp images
23
- type, _ = mimetypes.guess_type(image, strict=False)
23
+ type, _ = mimetypes.guess_type(file, strict=False)
24
24
  if type:
25
25
  mime_type = type
26
26
  else:
27
27
  mime_type = "image/png"
28
28
 
29
29
  # handle url or file
30
- if is_http_url(image):
30
+ if is_http_url(file):
31
31
  client = httpx.AsyncClient()
32
- image_bytes = (await client.get(image)).content
32
+ file_bytes = (await client.get(file)).content
33
33
  else:
34
- with file(image, "rb") as f:
35
- image_bytes = f.read()
34
+ with open_file(file, "rb") as f:
35
+ file_bytes = f.read()
36
36
 
37
37
  # return bytes and type
38
- return image_bytes, mime_type
38
+ return file_bytes, mime_type
39
39
 
40
40
 
41
- async def image_as_data_uri(image: str) -> str:
42
- bytes, mime_type = await image_as_data(image)
43
- base64_image = base64.b64encode(bytes).decode("utf-8")
44
- image = f"data:{mime_type};base64,{base64_image}"
45
- return image
41
+ async def file_as_data_uri(file: str) -> str:
42
+ if is_data_uri(file):
43
+ return file
44
+ else:
45
+ bytes, mime_type = await file_as_data(file)
46
+ base64_file = base64.b64encode(bytes).decode("utf-8")
47
+ file = f"data:{mime_type};base64,{base64_file}"
48
+ return file
inspect_ai/_util/json.py CHANGED
@@ -103,10 +103,20 @@ def json_changes(
103
103
  paths = json_change.path.split("/")[1:]
104
104
  replaced = before
105
105
  for path in paths:
106
- index: Any = int(path) if path.isnumeric() else path
106
+ decoded_path = decode_json_pointer_segment(path)
107
+ index: Any = (
108
+ int(decoded_path) if decoded_path.isnumeric() else decoded_path
109
+ )
107
110
  replaced = replaced[index]
108
111
  json_change.replaced = replaced
109
112
  changes.append(json_change)
110
113
  return changes
111
114
  else:
112
115
  return None
116
+
117
+
118
+ def decode_json_pointer_segment(segment: str) -> str:
119
+ """Decode a single JSON Pointer segment."""
120
+ # JSON points encode ~ and / because they are special characters
121
+ # this decodes these values (https://www.rfc-editor.org/rfc/rfc6901)
122
+ return segment.replace("~1", "/").replace("~0", "~")
@@ -0,0 +1,73 @@
1
+ import sqlite3
2
+ from contextlib import AbstractContextManager
3
+ from typing import Any, Optional, cast
4
+
5
+ from .appdirs import inspect_data_dir
6
+
7
+
8
+ class KVStore(AbstractContextManager["KVStore"]):
9
+ def __init__(self, filename: str, max_entries: int | None = None):
10
+ self.filename = filename
11
+ self.max_entries = max_entries
12
+
13
+ def __enter__(self) -> "KVStore":
14
+ self.conn = sqlite3.connect(self.filename)
15
+ self.conn.execute("""
16
+ CREATE TABLE IF NOT EXISTS kv_store (
17
+ key TEXT PRIMARY KEY,
18
+ value TEXT,
19
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
20
+ )
21
+ """)
22
+ self.conn.commit()
23
+ return self
24
+
25
+ def __exit__(self, *excinfo: Any) -> None:
26
+ self.conn.close()
27
+
28
+ def put(self, key: str, value: str) -> None:
29
+ # Insert or update the value
30
+ self.conn.execute(
31
+ """
32
+ INSERT OR REPLACE INTO kv_store (key, value, created_at)
33
+ VALUES (?, ?, CURRENT_TIMESTAMP)
34
+ """,
35
+ (key, value),
36
+ )
37
+
38
+ # If we have a max_entries limit, remove oldest entries
39
+ if self.max_entries:
40
+ count = self.count()
41
+ if count > self.max_entries:
42
+ self.conn.execute(
43
+ """
44
+ DELETE FROM kv_store
45
+ WHERE key IN (
46
+ SELECT key FROM kv_store
47
+ ORDER BY created_at ASC
48
+ LIMIT ?
49
+ )
50
+ """,
51
+ (max(0, count - self.max_entries),),
52
+ )
53
+
54
+ self.conn.commit()
55
+
56
+ def get(self, key: str) -> Optional[str]:
57
+ cursor = self.conn.execute("SELECT value FROM kv_store WHERE key = ?", (key,))
58
+ result = cursor.fetchone()
59
+ return result[0] if result else None
60
+
61
+ def delete(self, key: str) -> bool:
62
+ cursor = self.conn.execute("DELETE FROM kv_store WHERE key = ?", (key,))
63
+ self.conn.commit()
64
+ return cursor.rowcount > 0
65
+
66
+ def count(self) -> int:
67
+ cursor = self.conn.execute("SELECT COUNT(*) FROM kv_store")
68
+ return cast(int, cursor.fetchone()[0])
69
+
70
+
71
+ def inspect_kvstore(name: str, max_entries: int | None = None) -> KVStore:
72
+ filename = inspect_data_dir("kvstore") / f"{name}.db"
73
+ return KVStore(filename.as_posix(), max_entries=max_entries)
@@ -1,5 +1,6 @@
1
1
  import atexit
2
2
  import os
3
+ import re
3
4
  from logging import (
4
5
  DEBUG,
5
6
  INFO,
@@ -182,7 +183,7 @@ def notify_logger_record(record: LogRecord, write: bool) -> None:
182
183
  if write:
183
184
  transcript()._event(LoggerEvent(message=LoggingMessage.from_log_record(record)))
184
185
  global _rate_limit_count
185
- if (record.levelno <= INFO and "429" in record.getMessage()) or (
186
+ if (record.levelno <= INFO and re.search(r"\b429\b", record.getMessage())) or (
186
187
  record.levelno == DEBUG
187
188
  # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#validating-retry-attempts
188
189
  # for boto retry logic / log messages (this is tracking standard or adapative retries)
@@ -0,0 +1,18 @@
1
+ # Sentinel class used until PEP 0661 is accepted
2
+ from typing import Literal
3
+
4
+ from typing_extensions import override
5
+
6
+
7
+ class NotGiven:
8
+ """A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior)."""
9
+
10
+ def __bool__(self) -> Literal[False]:
11
+ return False
12
+
13
+ @override
14
+ def __repr__(self) -> str:
15
+ return "NOT_GIVEN"
16
+
17
+
18
+ NOT_GIVEN = NotGiven()
@@ -0,0 +1,5 @@
1
+ import threading
2
+
3
+
4
+ def is_main_thread() -> bool:
5
+ return threading.current_thread() is threading.main_thread()
inspect_ai/_util/trace.py CHANGED
@@ -33,6 +33,22 @@ def inspect_trace_file() -> Path:
33
33
  def trace_action(
34
34
  logger: Logger, action: str, message: str, *args: Any, **kwargs: Any
35
35
  ) -> Generator[None, None, None]:
36
+ """Trace a long running or poentially unreliable action.
37
+
38
+ Trace actions for which you want to collect data on the resolution
39
+ (e.g. succeeded, cancelled, failed, timed out, etc.) and duration of.
40
+
41
+ Traces are written to the `TRACE` log level (which is just below
42
+ `HTTP` and `INFO`). List and read trace logs with `inspect trace list`
43
+ and related commands (see `inspect trace --help` for details).
44
+
45
+ Args:
46
+ logger (Logger): Logger to use for tracing (e.g. from `getLogger(__name__)`)
47
+ action (str): Name of action to trace (e.g. 'Model', 'Subprocess', etc.)
48
+ message (str): Message describing action (can be a format string w/ args or kwargs)
49
+ *args (Any): Positional arguments for `message` format string.
50
+ **kwargs (Any): Named args for `message` format string.
51
+ """
36
52
  trace_id = uuid()
37
53
  start_monotonic = time.monotonic()
38
54
  start_wall = time.time()
@@ -117,6 +133,19 @@ def trace_action(
117
133
  def trace_message(
118
134
  logger: Logger, category: str, message: str, *args: Any, **kwargs: Any
119
135
  ) -> None:
136
+ """Log a message using the TRACE log level.
137
+
138
+ The `TRACE` log level is just below `HTTP` and `INFO`). List and
139
+ read trace logs with `inspect trace list` and related commands
140
+ (see `inspect trace --help` for details).
141
+
142
+ Args:
143
+ logger (Logger): Logger to use for tracing (e.g. from `getLogger(__name__)`)
144
+ category (str): Category of trace message.
145
+ message (str): Trace message (can be a format string w/ args or kwargs)
146
+ *args (Any): Positional arguments for `message` format string.
147
+ **kwargs (Any): Named args for `message` format string.
148
+ """
120
149
  logger.log(TRACE, f"[{category}] {message}", *args, **kwargs)
121
150
 
122
151
 
@@ -250,9 +279,16 @@ def read_trace_file(file: Path) -> list[TraceRecord]:
250
279
 
251
280
 
252
281
  def rotate_trace_files() -> None:
253
- rotate_files = list_trace_files()[10:]
254
- for file in rotate_files:
255
- file.file.unlink(missing_ok=True)
282
+ # if multiple inspect processes start up at once they
283
+ # will all be attempting to rotate at the same time,
284
+ # which can lead to FileNotFoundError -- ignore these
285
+ # errors if they occur
286
+ try:
287
+ rotate_files = list_trace_files()[10:]
288
+ for file in rotate_files:
289
+ file.file.unlink(missing_ok=True)
290
+ except FileNotFoundError:
291
+ pass
256
292
 
257
293
 
258
294
  def compress_trace_log(log_handler: FileHandler) -> Callable[[], None]:
@@ -1,4 +1,5 @@
1
1
  import html
2
+ import re
2
3
  from typing import Any
3
4
 
4
5
  from rich.align import AlignMethod
@@ -19,13 +20,43 @@ def transcript_code_theme() -> str:
19
20
  def transcript_markdown(content: str, *, escape: bool = False) -> Markdown:
20
21
  code_theme = transcript_code_theme()
21
22
  return Markdown(
22
- html.escape(content) if escape else content,
23
+ html_escape_markdown(content) if escape else content,
23
24
  code_theme=code_theme,
24
25
  inline_code_lexer="python",
25
26
  inline_code_theme=code_theme,
26
27
  )
27
28
 
28
29
 
30
+ def html_escape_markdown(content: str) -> str:
31
+ """Escape markdown lines that aren't in a code block."""
32
+ codeblock_pattern = re.compile("`{3,}")
33
+ current_codeblock = ""
34
+ escaped: list[str] = []
35
+ lines = content.splitlines()
36
+ for line in lines:
37
+ # look for matching end of codeblock
38
+ if current_codeblock:
39
+ if current_codeblock in line:
40
+ current_codeblock = ""
41
+ escaped.append(line)
42
+ continue
43
+
44
+ # look for beginning of codeblock
45
+ match = codeblock_pattern.search(line)
46
+ if match:
47
+ current_codeblock = match[0]
48
+ escaped.append(line)
49
+ continue
50
+
51
+ # escape if we are not in a codeblock
52
+ if current_codeblock:
53
+ escaped.append(line)
54
+ else:
55
+ escaped.append(html.escape(line, quote=False))
56
+
57
+ return "\n".join(escaped)
58
+
59
+
29
60
  def set_transcript_markdown_options(markdown: Markdown) -> None:
30
61
  code_theme = transcript_code_theme()
31
62
  markdown.code_theme = code_theme
@@ -89,12 +120,10 @@ def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableT
89
120
  return transcript_markdown("```python\n" + call + "\n```\n")
90
121
 
91
122
 
92
- DOUBLE_LINE = Box(
93
- " ══ \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n"
94
- )
123
+ DOUBLE_LINE = Box(" ══ \n \n \n \n \n \n \n \n")
95
124
 
96
- LINE = Box(" ── \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
125
+ LINE = Box(" ── \n \n \n \n \n \n \n \n")
97
126
 
98
- DOTTED = Box(" ·· \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
127
+ DOTTED = Box(" ·· \n \n \n \n \n \n \n \n")
99
128
 
100
- NOBORDER = Box(" \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n")
129
+ NOBORDER = Box(" \n \n \n \n \n \n \n \n")
@@ -0,0 +1,12 @@
1
+ // Do not remove this file even if the config is empty!
2
+ // VSCode's "Format Document" will respect this config and use the default
3
+ // settings, which is what we want. Without prettierrc, VSCode falls back to
4
+ // users settings, which could be different.
5
+
6
+ /**
7
+ * @see https://prettier.io/docs/en/configuration.html
8
+ * @type {import("prettier").Config}
9
+ */
10
+ const config = {};
11
+
12
+ export default config;