inspect-ai 0.3.108__py3-none-any.whl → 0.3.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. inspect_ai/_eval/task/log.py +1 -1
  2. inspect_ai/_eval/task/run.py +1 -1
  3. inspect_ai/_util/dateutil.py +40 -0
  4. inspect_ai/_view/schema.py +11 -0
  5. inspect_ai/_view/www/CLAUDE.md +1 -1
  6. inspect_ai/_view/www/dist/assets/index.css +2068 -1796
  7. inspect_ai/_view/www/dist/assets/index.js +7951 -3643
  8. inspect_ai/_view/www/package.json +3 -2
  9. inspect_ai/_view/www/src/@types/log.d.ts +5 -5
  10. inspect_ai/_view/www/src/app/App.css +71 -4
  11. inspect_ai/_view/www/src/app/App.tsx +7 -0
  12. inspect_ai/_view/www/src/app/appearance/icons.ts +18 -2
  13. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +7 -9
  14. inspect_ai/_view/www/src/app/log-list/LogItem.ts +18 -0
  15. inspect_ai/_view/www/src/app/log-list/LogListFooter.module.css +55 -0
  16. inspect_ai/_view/www/src/app/log-list/LogListFooter.tsx +67 -0
  17. inspect_ai/_view/www/src/app/log-list/LogPager.module.css +29 -0
  18. inspect_ai/_view/www/src/app/log-list/LogPager.tsx +134 -0
  19. inspect_ai/_view/www/src/app/log-list/LogsFilterInput.module.css +5 -0
  20. inspect_ai/_view/www/src/app/log-list/LogsFilterInput.tsx +31 -0
  21. inspect_ai/_view/www/src/app/log-list/LogsPanel.module.css +12 -0
  22. inspect_ai/_view/www/src/app/log-list/LogsPanel.tsx +178 -0
  23. inspect_ai/_view/www/src/app/log-list/grid/LogListGrid.module.css +115 -0
  24. inspect_ai/_view/www/src/app/log-list/grid/LogListGrid.tsx +304 -0
  25. inspect_ai/_view/www/src/app/log-list/grid/columns/CompletedDate.module.css +6 -0
  26. inspect_ai/_view/www/src/app/log-list/grid/columns/CompletedDate.tsx +64 -0
  27. inspect_ai/_view/www/src/app/log-list/grid/columns/EmptyCell.module.css +3 -0
  28. inspect_ai/_view/www/src/app/log-list/grid/columns/EmptyCell.tsx +7 -0
  29. inspect_ai/_view/www/src/app/log-list/grid/columns/FileName.module.css +20 -0
  30. inspect_ai/_view/www/src/app/log-list/grid/columns/FileName.tsx +52 -0
  31. inspect_ai/_view/www/src/app/log-list/grid/columns/Icon.module.css +11 -0
  32. inspect_ai/_view/www/src/app/log-list/grid/columns/Icon.tsx +35 -0
  33. inspect_ai/_view/www/src/app/log-list/grid/columns/Model.module.css +6 -0
  34. inspect_ai/_view/www/src/app/log-list/grid/columns/Model.tsx +34 -0
  35. inspect_ai/_view/www/src/app/log-list/grid/columns/Score.module.css +6 -0
  36. inspect_ai/_view/www/src/app/log-list/grid/columns/Score.tsx +61 -0
  37. inspect_ai/_view/www/src/app/log-list/grid/columns/Status.module.css +15 -0
  38. inspect_ai/_view/www/src/app/log-list/grid/columns/Status.tsx +95 -0
  39. inspect_ai/_view/www/src/app/log-list/grid/columns/Task.module.css +20 -0
  40. inspect_ai/_view/www/src/app/log-list/grid/columns/Task.tsx +50 -0
  41. inspect_ai/_view/www/src/app/log-list/grid/columns/columns.ts +27 -0
  42. inspect_ai/_view/www/src/app/log-view/LogView.tsx +2 -5
  43. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +4 -30
  44. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +5 -30
  45. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +4 -7
  46. inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/PrimaryBar.module.css +2 -0
  47. inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/PrimaryBar.tsx +3 -31
  48. inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ResultsPanel.tsx +7 -57
  49. inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ScoreGrid.tsx +2 -2
  50. inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/SecondaryBar.tsx +7 -1
  51. inspect_ai/_view/www/src/app/log-view/{navbar/Navbar.tsx → title-view/TitleView.tsx} +3 -6
  52. inspect_ai/_view/www/src/app/navbar/Navbar.module.css +57 -0
  53. inspect_ai/_view/www/src/app/navbar/Navbar.tsx +117 -0
  54. inspect_ai/_view/www/src/app/navbar/useBreadcrumbTruncation.ts +128 -0
  55. inspect_ai/_view/www/src/app/plan/DatasetDetailView.tsx +3 -3
  56. inspect_ai/_view/www/src/app/plan/DetailStep.tsx +6 -6
  57. inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -0
  58. inspect_ai/_view/www/src/app/plan/ScorerDetailView.tsx +1 -1
  59. inspect_ai/_view/www/src/app/routing/AppRouter.tsx +28 -4
  60. inspect_ai/_view/www/src/app/routing/RouteDispatcher.tsx +28 -0
  61. inspect_ai/_view/www/src/app/routing/sampleNavigation.ts +76 -7
  62. inspect_ai/_view/www/src/app/routing/url.ts +193 -20
  63. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +3 -17
  64. inspect_ai/_view/www/src/app/samples/descriptor/score/ScoreDescriptor.tsx +1 -1
  65. inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +2 -2
  66. inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +2 -2
  67. inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +5 -0
  68. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +26 -10
  69. inspect_ai/_view/www/src/app/types.ts +21 -1
  70. inspect_ai/_view/www/src/client/api/api-http.ts +2 -1
  71. inspect_ai/_view/www/src/client/api/api-shared.ts +0 -32
  72. inspect_ai/_view/www/src/client/api/client-api.ts +1 -1
  73. inspect_ai/_view/www/src/client/remote/remoteLogFile.ts +38 -6
  74. inspect_ai/_view/www/src/components/TextInput.module.css +45 -0
  75. inspect_ai/_view/www/src/components/TextInput.tsx +52 -0
  76. inspect_ai/_view/www/src/constants.ts +18 -0
  77. inspect_ai/_view/www/src/img/inspect-16.svg +10 -0
  78. inspect_ai/_view/www/src/img/inspect-back.svg +5 -0
  79. inspect_ai/_view/www/src/img/inspect-file.svg +26 -0
  80. inspect_ai/_view/www/src/img/inspect-forward.svg +7 -0
  81. inspect_ai/_view/www/src/img/inspect-home.svg +18 -0
  82. inspect_ai/_view/www/src/scoring/metrics.ts +75 -0
  83. inspect_ai/_view/www/src/scoring/scores.ts +19 -0
  84. inspect_ai/_view/www/src/scoring/types.ts +11 -0
  85. inspect_ai/_view/www/src/state/appSlice.ts +27 -7
  86. inspect_ai/_view/www/src/state/clientEvents.ts +73 -0
  87. inspect_ai/_view/www/src/state/clientEventsService.ts +105 -0
  88. inspect_ai/_view/www/src/state/hooks.ts +118 -1
  89. inspect_ai/_view/www/src/state/log.ts +19 -0
  90. inspect_ai/_view/www/src/state/logPolling.ts +3 -1
  91. inspect_ai/_view/www/src/state/logSlice.ts +9 -0
  92. inspect_ai/_view/www/src/state/logsSlice.ts +157 -15
  93. inspect_ai/_view/www/src/state/samplePolling.ts +4 -2
  94. inspect_ai/_view/www/src/tests/utils/path.test.ts +3 -3
  95. inspect_ai/_view/www/src/utils/evallog.ts +31 -0
  96. inspect_ai/_view/www/src/utils/path.ts +28 -0
  97. inspect_ai/_view/www/src/utils/uri.ts +49 -0
  98. inspect_ai/_view/www/yarn.lock +54 -17
  99. inspect_ai/analysis/beta/_dataframe/util.py +106 -10
  100. inspect_ai/log/_recorders/buffer/database.py +55 -16
  101. inspect_ai/model/_model.py +1 -1
  102. inspect_ai/model/_providers/providers.py +2 -2
  103. inspect_ai/model/_providers/vertex.py +3 -0
  104. inspect_ai/tool/_mcp/_mcp.py +6 -1
  105. inspect_ai/tool/_mcp/sampling.py +8 -1
  106. inspect_ai/tool/_tools/_bash_session.py +3 -6
  107. inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -8
  108. inspect_ai/util/_anyio.py +12 -3
  109. {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.109.dist-info}/METADATA +2 -2
  110. {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.109.dist-info}/RECORD +124 -94
  111. inspect_ai/_util/datetime.py +0 -10
  112. inspect_ai/_view/www/src/app/content/MetaDataView.module.css +0 -35
  113. inspect_ai/_view/www/src/app/content/MetaDataView.tsx +0 -101
  114. inspect_ai/_view/www/src/app/log-view/utils.ts +0 -34
  115. inspect_ai/_view/www/src/app/sidebar/EvalStatus.module.css +0 -15
  116. inspect_ai/_view/www/src/app/sidebar/EvalStatus.tsx +0 -72
  117. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +0 -16
  118. inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +0 -70
  119. inspect_ai/_view/www/src/app/sidebar/Sidebar.module.css +0 -77
  120. inspect_ai/_view/www/src/app/sidebar/Sidebar.tsx +0 -119
  121. inspect_ai/_view/www/src/app/sidebar/SidebarLogEntry.module.css +0 -29
  122. inspect_ai/_view/www/src/app/sidebar/SidebarLogEntry.tsx +0 -96
  123. inspect_ai/_view/www/src/app/sidebar/SidebarScoreView.module.css +0 -23
  124. inspect_ai/_view/www/src/app/sidebar/SidebarScoreView.tsx +0 -44
  125. inspect_ai/_view/www/src/app/sidebar/SidebarScoresView.module.css +0 -35
  126. inspect_ai/_view/www/src/app/sidebar/SidebarScoresView.tsx +0 -63
  127. inspect_ai/_view/www/src/state/logsPolling.ts +0 -118
  128. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ModelRolesView.module.css +0 -0
  129. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ModelRolesView.tsx +0 -0
  130. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ResultsPanel.module.css +0 -0
  131. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/RunningStatusPanel.module.css +0 -0
  132. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/RunningStatusPanel.tsx +0 -0
  133. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ScoreGrid.module.css +0 -0
  134. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/SecondaryBar.module.css +0 -0
  135. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/StatusPanel.module.css +0 -0
  136. /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/StatusPanel.tsx +0 -0
  137. /inspect_ai/_view/www/src/app/log-view/{navbar/Navbar.module.css → title-view/TitleView.module.css} +0 -0
  138. {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.109.dist-info}/WHEEL +0 -0
  139. {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.109.dist-info}/entry_points.txt +0 -0
  140. {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.109.dist-info}/licenses/LICENSE +0 -0
  141. {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.109.dist-info}/top_level.txt +0 -0
@@ -139,18 +139,13 @@ def add_unreferenced_columns(
139
139
 
140
140
 
141
141
  def records_to_pandas(records: list[dict[str, ColumnType]]) -> "pd.DataFrame":
142
+ import pandas as pd
142
143
  import pyarrow as pa
143
144
 
144
- # create arrow table
145
- records = normalize_records(records)
146
- table = pa.Table.from_pylist(records)
147
-
148
- # convert arrow to pandas
149
- df = table.to_pandas(types_mapper=arrow_types_mapper)
150
-
151
- # swap numpy-backed nullable columns for arrow-backed equivalents
152
- # df = df.convert_dtypes(dtype_backend="pyarrow")
153
- return df
145
+ # arrow backed df w/ our types mapper
146
+ df = pd.DataFrame(records)
147
+ table = pa.Table.from_pandas(df)
148
+ return table.to_pandas(types_mapper=arrow_types_mapper)
154
149
 
155
150
 
156
151
  def arrow_types_mapper(arrow_type: pa.DataType) -> pd.ArrowDtype:
@@ -160,3 +155,104 @@ def arrow_types_mapper(arrow_type: pa.DataType) -> pd.ArrowDtype:
160
155
  if pa.types.is_null(arrow_type):
161
156
  arrow_type = pa.string()
162
157
  return pd.ArrowDtype(arrow_type)
158
+
159
+
160
+ # sample_id string[pyarrow]
161
+ # eval_id string[pyarrow]
162
+ # id string[pyarrow]
163
+ # epoch int64[pyarrow]
164
+ # input string[pyarrow]
165
+ # target string[pyarrow]
166
+ # metadata_challenge_address string[pyarrow]
167
+ # metadata_challenge_type string[pyarrow]
168
+ # metadata_color string[pyarrow]
169
+ # metadata_cookie string[pyarrow]
170
+ # metadata_foo string[pyarrow]
171
+ # metadata_get_flag_cmd string[pyarrow]
172
+ # metadata_get_flag_service string[pyarrow]
173
+ # metadata_label_confidence double[pyarrow]
174
+ # metadata_long string[pyarrow]
175
+ # metadata_objective_prompt string[pyarrow]
176
+ # metadata_prompt string[pyarrow]
177
+ # metadata_variant string[pyarrow]
178
+ # score_another_rand_score double[pyarrow]
179
+ # score_check_flag string[pyarrow]
180
+ # score_choice string[pyarrow]
181
+ # score_compare_quantities double[pyarrow]
182
+ # score_complex_scorer string[pyarrow]
183
+ # score_exact string[pyarrow]
184
+ # score_foo double[pyarrow]
185
+ # score_generating_scorer double[pyarrow]
186
+ # score_includes string[pyarrow]
187
+ # score_letter_count string[pyarrow]
188
+ # score_match string[pyarrow]
189
+ # score_model_graded_fact string[pyarrow]
190
+ # score_model_graded_qa string[pyarrow]
191
+ # score_nested_dict_scorer string[pyarrow]
192
+ # score_nested_list_scorer string[pyarrow]
193
+ # score_rand_score double[pyarrow]
194
+ # score_score_color string[pyarrow]
195
+ # score_score_table string[pyarrow]
196
+ # score_simple_score string[pyarrow]
197
+ # score_simple_score1 string[pyarrow]
198
+ # score_simple_score2 string[pyarrow]
199
+ # score_slow_scorer double[pyarrow]
200
+ # score_token_consuming_scorer double[pyarrow]
201
+ # score_wildcard_scorer string[pyarrow]
202
+ # model_usage string[pyarrow]
203
+ # total_time double[pyarrow]
204
+ # working_time double[pyarrow]
205
+ # error string[pyarrow]
206
+ # limit string[pyarrow]
207
+ # retries int64[pyarrow]
208
+ # dtype: object
209
+
210
+ # sample_id string[pyarrow]
211
+ # eval_id string[pyarrow]
212
+ # id string[pyarrow]
213
+ # epoch int64[pyarrow]
214
+ # input string[pyarrow]
215
+ # target string[pyarrow]
216
+ # metadata_challenge_address string[pyarrow]
217
+ # metadata_challenge_type string[pyarrow]
218
+ # metadata_color string[pyarrow]
219
+ # metadata_cookie string[pyarrow]
220
+ # metadata_foo string[pyarrow]
221
+ # metadata_get_flag_cmd string[pyarrow]
222
+ # metadata_get_flag_service string[pyarrow]
223
+ # metadata_label_confidence double[pyarrow]
224
+ # metadata_long string[pyarrow]
225
+ # metadata_objective_prompt string[pyarrow]
226
+ # metadata_prompt string[pyarrow]
227
+ # metadata_variant string[pyarrow]
228
+ # score_another_rand_score int64[pyarrow]
229
+ # score_check_flag string[pyarrow]
230
+ # score_choice string[pyarrow]
231
+ # score_compare_quantities double[pyarrow]
232
+ # score_complex_scorer string[pyarrow]
233
+ # score_exact string[pyarrow]
234
+ # score_foo double[pyarrow]
235
+ # score_generating_scorer int64[pyarrow]
236
+ # score_includes string[pyarrow]
237
+ # score_letter_count string[pyarrow]
238
+ # score_match string[pyarrow]
239
+ # score_model_graded_fact string[pyarrow]
240
+ # score_model_graded_qa string[pyarrow]
241
+ # score_nested_dict_scorer string[pyarrow]
242
+ # score_nested_list_scorer string[pyarrow]
243
+ # score_rand_score int64[pyarrow]
244
+ # score_score_color string[pyarrow]
245
+ # score_score_table string[pyarrow]
246
+ # score_simple_score string[pyarrow]
247
+ # score_simple_score1 string[pyarrow]
248
+ # score_simple_score2 string[pyarrow]
249
+ # score_slow_scorer int64[pyarrow]
250
+ # score_token_consuming_scorer int64[pyarrow]
251
+ # score_wildcard_scorer string[pyarrow]
252
+ # model_usage string[pyarrow]
253
+ # total_time double[pyarrow]
254
+ # working_time double[pyarrow]
255
+ # error string[pyarrow]
256
+ # limit string[pyarrow]
257
+ # retries int64[pyarrow]
258
+ # dtype: object
@@ -1,3 +1,4 @@
1
+ import datetime
1
2
  import hashlib
2
3
  import json
3
4
  import os
@@ -15,6 +16,7 @@ from typing_extensions import override
15
16
 
16
17
  from inspect_ai._display.core.display import TaskDisplayMetric
17
18
  from inspect_ai._util.appdirs import inspect_data_dir
19
+ from inspect_ai._util.dateutil import is_file_older_than
18
20
  from inspect_ai._util.file import basename, dirname, filesystem
19
21
  from inspect_ai._util.json import to_json_str_safe
20
22
  from inspect_ai._util.trace import trace_action
@@ -301,17 +303,44 @@ class SampleBufferDatabase(SampleBuffer):
301
303
  @contextmanager
302
304
  def _get_connection(self, *, write: bool = False) -> Iterator[Connection]:
303
305
  """Get a database connection."""
304
- conn = sqlite3.connect(self.db_path, timeout=10)
305
- conn.row_factory = sqlite3.Row # Enable row factory for named columns
306
- try:
307
- # Enable foreign key constraints
308
- conn.execute("PRAGMA foreign_keys = ON")
306
+ max_retries = 5
307
+ retry_delay = 0.1
308
+
309
+ conn: Connection | None = None
310
+ last_error: Exception | None = None
309
311
 
310
- # concurrency setup
311
- conn.execute("PRAGMA journal_mode=MEMORY")
312
- conn.execute("PRAGMA busy_timeout=10000")
313
- conn.execute("PRAGMA synchronous=OFF")
312
+ for attempt in range(max_retries):
313
+ try:
314
+ conn = sqlite3.connect(self.db_path, timeout=30)
315
+ conn.row_factory = sqlite3.Row # enable row factory for named columns
316
+
317
+ # Enable foreign key constraints
318
+ conn.execute("PRAGMA foreign_keys = ON")
319
+
320
+ # concurrency setup
321
+ conn.execute("PRAGMA busy_timeout=30000")
322
+ conn.execute("PRAGMA synchronous=OFF")
323
+ conn.execute("PRAGMA cache_size=-64000")
324
+ conn.execute("PRAGMA temp_store=MEMORY")
325
+
326
+ break
327
+
328
+ except sqlite3.OperationalError as e:
329
+ last_error = e
330
+ if "locked" in str(e) and attempt < max_retries - 1:
331
+ if conn:
332
+ conn.close()
333
+ time.sleep(retry_delay * (2**attempt))
334
+ continue
335
+ raise
336
+
337
+ # ensure we have a connection
338
+ if conn is None:
339
+ raise sqlite3.OperationalError(
340
+ f"Failed to establish connection after {max_retries} attempts"
341
+ ) from last_error
314
342
 
343
+ try:
315
344
  # do work
316
345
  yield conn
317
346
 
@@ -663,13 +692,23 @@ def maximum_ids(
663
692
 
664
693
 
665
694
  def cleanup_sample_buffer_databases(db_dir: Path | None = None) -> None:
666
- db_dir = resolve_db_dir(db_dir)
667
- for db in db_dir.glob("*.*.db"):
668
- _, pid_str, _ = db.name.rsplit(".", 2)
669
- if pid_str.isdigit():
670
- pid = int(pid_str)
671
- if not psutil.pid_exists(pid):
672
- cleanup_sample_buffer_db(db)
695
+ try:
696
+ db_dir = resolve_db_dir(db_dir)
697
+ for db in db_dir.glob("*.*.db"):
698
+ # this is a failsafe cleanup method for buffer db's leaked during
699
+ # abnormal terminations. therefore, it's not critical that we clean
700
+ # it up immediately. it's also possible that users are _sharing_
701
+ # their inspect_data_dir across multiple pid namespaces (e.g. in an
702
+ # effort to share their cache) one eval could remove the db of
703
+ # another running eval if we don't put in a delay.
704
+ if is_file_older_than(db, datetime.timedelta(days=3), default=False):
705
+ _, pid_str, _ = db.name.rsplit(".", 2)
706
+ if pid_str.isdigit():
707
+ pid = int(pid_str)
708
+ if not psutil.pid_exists(pid):
709
+ cleanup_sample_buffer_db(db)
710
+ except Exception as ex:
711
+ logger.warning(f"Error cleaning up sample buffer databases at {db_dir}: {ex}")
673
712
 
674
713
 
675
714
  def cleanup_sample_buffer_db(path: Path) -> None:
@@ -560,7 +560,7 @@ class Model:
560
560
  input = collapse_consecutive_assistant_messages(input)
561
561
 
562
562
  # retry for transient http errors:
563
- # - no default timeout or max_retries (try forever)
563
+ # - use config.max_retries and config.timeout if specified, otherwise retry forever
564
564
  # - exponential backoff starting at 3 seconds (will wait 25 minutes
565
565
  # on the 10th retry,then will wait no longer than 30 minutes on
566
566
  # subsequent retries)
@@ -96,9 +96,9 @@ def vertex() -> type[ModelAPI]:
96
96
  verify_required_version(FEATURE, PACKAGE, MIN_VERSION)
97
97
 
98
98
  # in the clear
99
- from .vertex import VertexAPI
99
+ from .vertex import VertexAPI # type: ignore
100
100
 
101
- return VertexAPI
101
+ return VertexAPI # type: ignore
102
102
 
103
103
 
104
104
  @modelapi(name="google")
@@ -1,3 +1,5 @@
1
+ # type: ignore
2
+
1
3
  import functools
2
4
  import json
3
5
  from copy import copy
@@ -340,6 +342,7 @@ async def content_part(content: Content | str) -> Part:
340
342
  if isinstance(content, ContentAudio):
341
343
  file = content.audio
342
344
  elif isinstance(content, ContentData):
345
+ file = ""
343
346
  assert False, "Vertex provider should never encounter ContentData"
344
347
  else:
345
348
  # it's ContentVideo
@@ -15,6 +15,7 @@ from mcp.types import (
15
15
  AudioContent,
16
16
  EmbeddedResource,
17
17
  ImageContent,
18
+ ResourceLink,
18
19
  TextContent,
19
20
  TextResourceContents,
20
21
  )
@@ -283,7 +284,9 @@ def create_server_sandbox(
283
284
 
284
285
 
285
286
  def tool_result_as_text(
286
- content: list[TextContent | ImageContent | AudioContent | EmbeddedResource],
287
+ content: list[
288
+ TextContent | ImageContent | AudioContent | ResourceLink | EmbeddedResource
289
+ ],
287
290
  ) -> str:
288
291
  content_list: list[str] = []
289
292
  for c in content:
@@ -293,6 +296,8 @@ def tool_result_as_text(
293
296
  content_list.append("(base64 encoded image omitted)")
294
297
  elif isinstance(c, AudioContent):
295
298
  content_list.append("(base64 encoded audio omitted)")
299
+ elif isinstance(c, ResourceLink):
300
+ content_list.append(f"{c.description} ({c.uri})")
296
301
  elif isinstance(c.resource, TextResourceContents):
297
302
  content_list.append(c.resource.text)
298
303
 
@@ -10,6 +10,7 @@ from mcp.types import (
10
10
  EmbeddedResource,
11
11
  ErrorData,
12
12
  ImageContent,
13
+ ResourceLink,
13
14
  TextContent,
14
15
  TextResourceContents,
15
16
  )
@@ -94,7 +95,11 @@ async def sampling_fn(
94
95
 
95
96
 
96
97
  def as_inspect_content(
97
- content: TextContent | ImageContent | AudioContent | EmbeddedResource,
98
+ content: TextContent
99
+ | ImageContent
100
+ | AudioContent
101
+ | ResourceLink
102
+ | EmbeddedResource,
98
103
  ) -> Content:
99
104
  if isinstance(content, TextContent):
100
105
  return ContentText(text=content.text)
@@ -107,6 +112,8 @@ def as_inspect_content(
107
112
  audio=f"data:audio/{content.mimeType};base64,{content.data}",
108
113
  format=_get_audio_format(content.mimeType),
109
114
  )
115
+ elif isinstance(content, ResourceLink):
116
+ return ContentText(text=f"{content.description} ({content.uri})")
110
117
  elif isinstance(content.resource, TextResourceContents):
111
118
  return ContentText(text=content.resource.text)
112
119
  else:
@@ -3,7 +3,6 @@ from typing import Annotated, Literal
3
3
 
4
4
  from pydantic import BaseModel, Discriminator, Field, RootModel
5
5
  from semver import Version
6
- from shortuuid import uuid
7
6
 
8
7
  from inspect_ai._util.error import PrerequisiteError
9
8
  from inspect_ai.tool import ToolResult
@@ -82,7 +81,7 @@ def bash_session(
82
81
  *,
83
82
  timeout: int | None = None, # default is max_wait + 5 seconds
84
83
  wait_for_output: int | None = None, # default is 30 seconds
85
- instance: str | None = uuid(),
84
+ instance: str | None = None,
86
85
  ) -> Tool:
87
86
  """Interactive bash shell session tool.
88
87
 
@@ -91,10 +90,8 @@ def bash_session(
91
90
  which could be a command followed by a newline character or any other input
92
91
  text such as the response to a password prompt.
93
92
 
94
- By default, a separate bash process is created within the sandbox for each
95
- call to `bash_session()`. You can modify this behavior by passing
96
- `instance=None` (which will result in a single bash process for the entire
97
- sample) or use other `instance` values that implement another scheme).
93
+ To create a separate bash process for each
94
+ call to `bash_session()`, pass a unique value for `instance`
98
95
 
99
96
  See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-bash-session>.
100
97
 
@@ -1,7 +1,6 @@
1
1
  import re
2
2
 
3
3
  from pydantic import BaseModel, Field
4
- from shortuuid import uuid
5
4
 
6
5
  from inspect_ai._util.content import ContentText
7
6
  from inspect_ai._util.error import PrerequisiteError
@@ -32,15 +31,11 @@ class CrawlerResult(BaseModel):
32
31
  error: str | None = None
33
32
 
34
33
 
35
- def web_browser(
36
- *, interactive: bool = True, instance: str | None = uuid()
37
- ) -> list[Tool]:
34
+ def web_browser(*, interactive: bool = True, instance: str | None = None) -> list[Tool]:
38
35
  """Tools used for web browser navigation.
39
36
 
40
- By default, a separate web browser process is created within the sandbox for each
41
- call to `web_browser()`. You can modify this behavior by passing `instance=None`
42
- (which will result in a single web browser for the entire sample) or use other
43
- `instance` values that implement another scheme).
37
+ To create a separate web browser process for each
38
+ call to `web_browser()`, pass a unique value for `instance`.
44
39
 
45
40
  See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
46
41
 
inspect_ai/util/_anyio.py CHANGED
@@ -10,11 +10,20 @@ if sys.version_info < (3, 11):
10
10
 
11
11
 
12
12
  def inner_exception(exc: Exception) -> Exception:
13
- return _flatten_exception(exc)[0]
13
+ return _flatten_exception(exc, set())[0]
14
14
 
15
15
 
16
- def _flatten_exception(exc: Exception) -> list[Exception]:
16
+ def _flatten_exception(exc: Exception, seen: set[int] | None = None) -> list[Exception]:
17
17
  """Recursively flatten an exception to get all related (__context__) and contained (ExceptionGroup) exceptions."""
18
+ if seen is None:
19
+ seen = set()
20
+
21
+ # Prevent infinite recursion by tracking seen exceptions by their id
22
+ exc_id = id(exc)
23
+ if exc_id in seen:
24
+ return []
25
+ seen.add(exc_id)
26
+
18
27
  context_to_follow = (
19
28
  [exc.__context__]
20
29
  # conceptually, if __cause__ is present, it means that this exception
@@ -36,7 +45,7 @@ def _flatten_exception(exc: Exception) -> list[Exception]:
36
45
  other_exceptions = [
37
46
  flattened_e
38
47
  for e in set(itertools.chain(context_to_follow, children_to_follow))
39
- for flattened_e in _flatten_exception(e)
48
+ for flattened_e in _flatten_exception(e, seen)
40
49
  ]
41
50
 
42
51
  return maybe_this_exception + other_exceptions
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.108
3
+ Version: 0.3.109
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -63,7 +63,7 @@ Requires-Dist: groq; extra == "dev"
63
63
  Requires-Dist: ipython; extra == "dev"
64
64
  Requires-Dist: jsonpath-ng; extra == "dev"
65
65
  Requires-Dist: markdown; extra == "dev"
66
- Requires-Dist: mcp>=1.9.4; extra == "dev"
66
+ Requires-Dist: mcp>=1.10.0; extra == "dev"
67
67
  Requires-Dist: mistralai; extra == "dev"
68
68
  Requires-Dist: moto[server]; extra == "dev"
69
69
  Requires-Dist: mypy>=1.16.0; extra == "dev"