inspect-ai 0.3.108__py3-none-any.whl → 0.3.110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/task/log.py +1 -1
- inspect_ai/_eval/task/run.py +7 -3
- inspect_ai/_util/dateutil.py +40 -0
- inspect_ai/_view/schema.py +11 -0
- inspect_ai/_view/www/CLAUDE.md +1 -1
- inspect_ai/_view/www/dist/assets/index.css +2068 -1796
- inspect_ai/_view/www/dist/assets/index.js +7951 -3643
- inspect_ai/_view/www/package.json +3 -2
- inspect_ai/_view/www/src/@types/log.d.ts +5 -5
- inspect_ai/_view/www/src/app/App.css +71 -4
- inspect_ai/_view/www/src/app/App.tsx +7 -0
- inspect_ai/_view/www/src/app/appearance/icons.ts +18 -2
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +7 -9
- inspect_ai/_view/www/src/app/log-list/LogItem.ts +18 -0
- inspect_ai/_view/www/src/app/log-list/LogListFooter.module.css +55 -0
- inspect_ai/_view/www/src/app/log-list/LogListFooter.tsx +67 -0
- inspect_ai/_view/www/src/app/log-list/LogPager.module.css +29 -0
- inspect_ai/_view/www/src/app/log-list/LogPager.tsx +134 -0
- inspect_ai/_view/www/src/app/log-list/LogsFilterInput.module.css +5 -0
- inspect_ai/_view/www/src/app/log-list/LogsFilterInput.tsx +31 -0
- inspect_ai/_view/www/src/app/log-list/LogsPanel.module.css +12 -0
- inspect_ai/_view/www/src/app/log-list/LogsPanel.tsx +178 -0
- inspect_ai/_view/www/src/app/log-list/grid/LogListGrid.module.css +115 -0
- inspect_ai/_view/www/src/app/log-list/grid/LogListGrid.tsx +304 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/CompletedDate.module.css +6 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/CompletedDate.tsx +64 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/EmptyCell.module.css +3 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/EmptyCell.tsx +7 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/FileName.module.css +20 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/FileName.tsx +52 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Icon.module.css +11 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Icon.tsx +35 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Model.module.css +6 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Model.tsx +34 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Score.module.css +6 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Score.tsx +61 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Status.module.css +15 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Status.tsx +95 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Task.module.css +20 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/Task.tsx +50 -0
- inspect_ai/_view/www/src/app/log-list/grid/columns/columns.ts +27 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +2 -5
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +4 -30
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +5 -30
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +4 -7
- inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/PrimaryBar.module.css +2 -0
- inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/PrimaryBar.tsx +3 -31
- inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ResultsPanel.tsx +7 -57
- inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ScoreGrid.tsx +2 -2
- inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/SecondaryBar.tsx +7 -1
- inspect_ai/_view/www/src/app/log-view/{navbar/Navbar.tsx → title-view/TitleView.tsx} +3 -6
- inspect_ai/_view/www/src/app/navbar/Navbar.module.css +57 -0
- inspect_ai/_view/www/src/app/navbar/Navbar.tsx +117 -0
- inspect_ai/_view/www/src/app/navbar/useBreadcrumbTruncation.ts +128 -0
- inspect_ai/_view/www/src/app/plan/DatasetDetailView.tsx +3 -3
- inspect_ai/_view/www/src/app/plan/DetailStep.tsx +6 -6
- inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -0
- inspect_ai/_view/www/src/app/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/AppRouter.tsx +28 -4
- inspect_ai/_view/www/src/app/routing/RouteDispatcher.tsx +28 -0
- inspect_ai/_view/www/src/app/routing/sampleNavigation.ts +76 -7
- inspect_ai/_view/www/src/app/routing/url.ts +193 -20
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +3 -17
- inspect_ai/_view/www/src/app/samples/descriptor/score/ScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +5 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +26 -10
- inspect_ai/_view/www/src/app/types.ts +21 -1
- inspect_ai/_view/www/src/client/api/api-http.ts +2 -1
- inspect_ai/_view/www/src/client/api/api-shared.ts +0 -32
- inspect_ai/_view/www/src/client/api/client-api.ts +1 -1
- inspect_ai/_view/www/src/client/remote/remoteLogFile.ts +38 -6
- inspect_ai/_view/www/src/components/TextInput.module.css +45 -0
- inspect_ai/_view/www/src/components/TextInput.tsx +52 -0
- inspect_ai/_view/www/src/constants.ts +18 -0
- inspect_ai/_view/www/src/img/inspect-16.svg +10 -0
- inspect_ai/_view/www/src/img/inspect-back.svg +5 -0
- inspect_ai/_view/www/src/img/inspect-file.svg +26 -0
- inspect_ai/_view/www/src/img/inspect-forward.svg +7 -0
- inspect_ai/_view/www/src/img/inspect-home.svg +18 -0
- inspect_ai/_view/www/src/scoring/metrics.ts +75 -0
- inspect_ai/_view/www/src/scoring/scores.ts +19 -0
- inspect_ai/_view/www/src/scoring/types.ts +11 -0
- inspect_ai/_view/www/src/state/appSlice.ts +27 -7
- inspect_ai/_view/www/src/state/clientEvents.ts +73 -0
- inspect_ai/_view/www/src/state/clientEventsService.ts +105 -0
- inspect_ai/_view/www/src/state/hooks.ts +118 -1
- inspect_ai/_view/www/src/state/log.ts +19 -0
- inspect_ai/_view/www/src/state/logPolling.ts +3 -1
- inspect_ai/_view/www/src/state/logSlice.ts +9 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +157 -15
- inspect_ai/_view/www/src/state/samplePolling.ts +4 -2
- inspect_ai/_view/www/src/tests/utils/path.test.ts +3 -3
- inspect_ai/_view/www/src/utils/evallog.ts +31 -0
- inspect_ai/_view/www/src/utils/path.ts +28 -0
- inspect_ai/_view/www/src/utils/uri.ts +49 -0
- inspect_ai/_view/www/yarn.lock +54 -17
- inspect_ai/analysis/beta/_dataframe/util.py +106 -10
- inspect_ai/log/_recorders/buffer/database.py +55 -16
- inspect_ai/model/_model.py +1 -1
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/tool/_mcp/_mcp.py +6 -1
- inspect_ai/tool/_mcp/sampling.py +8 -1
- inspect_ai/tool/_tools/_bash_session.py +3 -6
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -8
- inspect_ai/util/_anyio.py +12 -3
- {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.110.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.110.dist-info}/RECORD +124 -94
- inspect_ai/_util/datetime.py +0 -10
- inspect_ai/_view/www/src/app/content/MetaDataView.module.css +0 -35
- inspect_ai/_view/www/src/app/content/MetaDataView.tsx +0 -101
- inspect_ai/_view/www/src/app/log-view/utils.ts +0 -34
- inspect_ai/_view/www/src/app/sidebar/EvalStatus.module.css +0 -15
- inspect_ai/_view/www/src/app/sidebar/EvalStatus.tsx +0 -72
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +0 -16
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +0 -70
- inspect_ai/_view/www/src/app/sidebar/Sidebar.module.css +0 -77
- inspect_ai/_view/www/src/app/sidebar/Sidebar.tsx +0 -119
- inspect_ai/_view/www/src/app/sidebar/SidebarLogEntry.module.css +0 -29
- inspect_ai/_view/www/src/app/sidebar/SidebarLogEntry.tsx +0 -96
- inspect_ai/_view/www/src/app/sidebar/SidebarScoreView.module.css +0 -23
- inspect_ai/_view/www/src/app/sidebar/SidebarScoreView.tsx +0 -44
- inspect_ai/_view/www/src/app/sidebar/SidebarScoresView.module.css +0 -35
- inspect_ai/_view/www/src/app/sidebar/SidebarScoresView.tsx +0 -63
- inspect_ai/_view/www/src/state/logsPolling.ts +0 -118
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ModelRolesView.module.css +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ModelRolesView.tsx +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ResultsPanel.module.css +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/RunningStatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/RunningStatusPanel.tsx +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/ScoreGrid.module.css +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/SecondaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/StatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar → title-view}/StatusPanel.tsx +0 -0
- /inspect_ai/_view/www/src/app/log-view/{navbar/Navbar.module.css → title-view/TitleView.module.css} +0 -0
- {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.110.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.110.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.110.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.108.dist-info → inspect_ai-0.3.110.dist-info}/top_level.txt +0 -0
@@ -139,18 +139,13 @@ def add_unreferenced_columns(
|
|
139
139
|
|
140
140
|
|
141
141
|
def records_to_pandas(records: list[dict[str, ColumnType]]) -> "pd.DataFrame":
|
142
|
+
import pandas as pd
|
142
143
|
import pyarrow as pa
|
143
144
|
|
144
|
-
#
|
145
|
-
|
146
|
-
table = pa.Table.
|
147
|
-
|
148
|
-
# convert arrow to pandas
|
149
|
-
df = table.to_pandas(types_mapper=arrow_types_mapper)
|
150
|
-
|
151
|
-
# swap numpy-backed nullable columns for arrow-backed equivalents
|
152
|
-
# df = df.convert_dtypes(dtype_backend="pyarrow")
|
153
|
-
return df
|
145
|
+
# arrow backed df w/ our types mapper
|
146
|
+
df = pd.DataFrame(records)
|
147
|
+
table = pa.Table.from_pandas(df)
|
148
|
+
return table.to_pandas(types_mapper=arrow_types_mapper)
|
154
149
|
|
155
150
|
|
156
151
|
def arrow_types_mapper(arrow_type: pa.DataType) -> pd.ArrowDtype:
|
@@ -160,3 +155,104 @@ def arrow_types_mapper(arrow_type: pa.DataType) -> pd.ArrowDtype:
|
|
160
155
|
if pa.types.is_null(arrow_type):
|
161
156
|
arrow_type = pa.string()
|
162
157
|
return pd.ArrowDtype(arrow_type)
|
158
|
+
|
159
|
+
|
160
|
+
# sample_id string[pyarrow]
|
161
|
+
# eval_id string[pyarrow]
|
162
|
+
# id string[pyarrow]
|
163
|
+
# epoch int64[pyarrow]
|
164
|
+
# input string[pyarrow]
|
165
|
+
# target string[pyarrow]
|
166
|
+
# metadata_challenge_address string[pyarrow]
|
167
|
+
# metadata_challenge_type string[pyarrow]
|
168
|
+
# metadata_color string[pyarrow]
|
169
|
+
# metadata_cookie string[pyarrow]
|
170
|
+
# metadata_foo string[pyarrow]
|
171
|
+
# metadata_get_flag_cmd string[pyarrow]
|
172
|
+
# metadata_get_flag_service string[pyarrow]
|
173
|
+
# metadata_label_confidence double[pyarrow]
|
174
|
+
# metadata_long string[pyarrow]
|
175
|
+
# metadata_objective_prompt string[pyarrow]
|
176
|
+
# metadata_prompt string[pyarrow]
|
177
|
+
# metadata_variant string[pyarrow]
|
178
|
+
# score_another_rand_score double[pyarrow]
|
179
|
+
# score_check_flag string[pyarrow]
|
180
|
+
# score_choice string[pyarrow]
|
181
|
+
# score_compare_quantities double[pyarrow]
|
182
|
+
# score_complex_scorer string[pyarrow]
|
183
|
+
# score_exact string[pyarrow]
|
184
|
+
# score_foo double[pyarrow]
|
185
|
+
# score_generating_scorer double[pyarrow]
|
186
|
+
# score_includes string[pyarrow]
|
187
|
+
# score_letter_count string[pyarrow]
|
188
|
+
# score_match string[pyarrow]
|
189
|
+
# score_model_graded_fact string[pyarrow]
|
190
|
+
# score_model_graded_qa string[pyarrow]
|
191
|
+
# score_nested_dict_scorer string[pyarrow]
|
192
|
+
# score_nested_list_scorer string[pyarrow]
|
193
|
+
# score_rand_score double[pyarrow]
|
194
|
+
# score_score_color string[pyarrow]
|
195
|
+
# score_score_table string[pyarrow]
|
196
|
+
# score_simple_score string[pyarrow]
|
197
|
+
# score_simple_score1 string[pyarrow]
|
198
|
+
# score_simple_score2 string[pyarrow]
|
199
|
+
# score_slow_scorer double[pyarrow]
|
200
|
+
# score_token_consuming_scorer double[pyarrow]
|
201
|
+
# score_wildcard_scorer string[pyarrow]
|
202
|
+
# model_usage string[pyarrow]
|
203
|
+
# total_time double[pyarrow]
|
204
|
+
# working_time double[pyarrow]
|
205
|
+
# error string[pyarrow]
|
206
|
+
# limit string[pyarrow]
|
207
|
+
# retries int64[pyarrow]
|
208
|
+
# dtype: object
|
209
|
+
|
210
|
+
# sample_id string[pyarrow]
|
211
|
+
# eval_id string[pyarrow]
|
212
|
+
# id string[pyarrow]
|
213
|
+
# epoch int64[pyarrow]
|
214
|
+
# input string[pyarrow]
|
215
|
+
# target string[pyarrow]
|
216
|
+
# metadata_challenge_address string[pyarrow]
|
217
|
+
# metadata_challenge_type string[pyarrow]
|
218
|
+
# metadata_color string[pyarrow]
|
219
|
+
# metadata_cookie string[pyarrow]
|
220
|
+
# metadata_foo string[pyarrow]
|
221
|
+
# metadata_get_flag_cmd string[pyarrow]
|
222
|
+
# metadata_get_flag_service string[pyarrow]
|
223
|
+
# metadata_label_confidence double[pyarrow]
|
224
|
+
# metadata_long string[pyarrow]
|
225
|
+
# metadata_objective_prompt string[pyarrow]
|
226
|
+
# metadata_prompt string[pyarrow]
|
227
|
+
# metadata_variant string[pyarrow]
|
228
|
+
# score_another_rand_score int64[pyarrow]
|
229
|
+
# score_check_flag string[pyarrow]
|
230
|
+
# score_choice string[pyarrow]
|
231
|
+
# score_compare_quantities double[pyarrow]
|
232
|
+
# score_complex_scorer string[pyarrow]
|
233
|
+
# score_exact string[pyarrow]
|
234
|
+
# score_foo double[pyarrow]
|
235
|
+
# score_generating_scorer int64[pyarrow]
|
236
|
+
# score_includes string[pyarrow]
|
237
|
+
# score_letter_count string[pyarrow]
|
238
|
+
# score_match string[pyarrow]
|
239
|
+
# score_model_graded_fact string[pyarrow]
|
240
|
+
# score_model_graded_qa string[pyarrow]
|
241
|
+
# score_nested_dict_scorer string[pyarrow]
|
242
|
+
# score_nested_list_scorer string[pyarrow]
|
243
|
+
# score_rand_score int64[pyarrow]
|
244
|
+
# score_score_color string[pyarrow]
|
245
|
+
# score_score_table string[pyarrow]
|
246
|
+
# score_simple_score string[pyarrow]
|
247
|
+
# score_simple_score1 string[pyarrow]
|
248
|
+
# score_simple_score2 string[pyarrow]
|
249
|
+
# score_slow_scorer int64[pyarrow]
|
250
|
+
# score_token_consuming_scorer int64[pyarrow]
|
251
|
+
# score_wildcard_scorer string[pyarrow]
|
252
|
+
# model_usage string[pyarrow]
|
253
|
+
# total_time double[pyarrow]
|
254
|
+
# working_time double[pyarrow]
|
255
|
+
# error string[pyarrow]
|
256
|
+
# limit string[pyarrow]
|
257
|
+
# retries int64[pyarrow]
|
258
|
+
# dtype: object
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import datetime
|
1
2
|
import hashlib
|
2
3
|
import json
|
3
4
|
import os
|
@@ -15,6 +16,7 @@ from typing_extensions import override
|
|
15
16
|
|
16
17
|
from inspect_ai._display.core.display import TaskDisplayMetric
|
17
18
|
from inspect_ai._util.appdirs import inspect_data_dir
|
19
|
+
from inspect_ai._util.dateutil import is_file_older_than
|
18
20
|
from inspect_ai._util.file import basename, dirname, filesystem
|
19
21
|
from inspect_ai._util.json import to_json_str_safe
|
20
22
|
from inspect_ai._util.trace import trace_action
|
@@ -301,17 +303,44 @@ class SampleBufferDatabase(SampleBuffer):
|
|
301
303
|
@contextmanager
|
302
304
|
def _get_connection(self, *, write: bool = False) -> Iterator[Connection]:
|
303
305
|
"""Get a database connection."""
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
306
|
+
max_retries = 5
|
307
|
+
retry_delay = 0.1
|
308
|
+
|
309
|
+
conn: Connection | None = None
|
310
|
+
last_error: Exception | None = None
|
309
311
|
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
312
|
+
for attempt in range(max_retries):
|
313
|
+
try:
|
314
|
+
conn = sqlite3.connect(self.db_path, timeout=30)
|
315
|
+
conn.row_factory = sqlite3.Row # enable row factory for named columns
|
316
|
+
|
317
|
+
# Enable foreign key constraints
|
318
|
+
conn.execute("PRAGMA foreign_keys = ON")
|
319
|
+
|
320
|
+
# concurrency setup
|
321
|
+
conn.execute("PRAGMA busy_timeout=30000")
|
322
|
+
conn.execute("PRAGMA synchronous=OFF")
|
323
|
+
conn.execute("PRAGMA cache_size=-64000")
|
324
|
+
conn.execute("PRAGMA temp_store=MEMORY")
|
325
|
+
|
326
|
+
break
|
327
|
+
|
328
|
+
except sqlite3.OperationalError as e:
|
329
|
+
last_error = e
|
330
|
+
if "locked" in str(e) and attempt < max_retries - 1:
|
331
|
+
if conn:
|
332
|
+
conn.close()
|
333
|
+
time.sleep(retry_delay * (2**attempt))
|
334
|
+
continue
|
335
|
+
raise
|
336
|
+
|
337
|
+
# ensure we have a connection
|
338
|
+
if conn is None:
|
339
|
+
raise sqlite3.OperationalError(
|
340
|
+
f"Failed to establish connection after {max_retries} attempts"
|
341
|
+
) from last_error
|
314
342
|
|
343
|
+
try:
|
315
344
|
# do work
|
316
345
|
yield conn
|
317
346
|
|
@@ -663,13 +692,23 @@ def maximum_ids(
|
|
663
692
|
|
664
693
|
|
665
694
|
def cleanup_sample_buffer_databases(db_dir: Path | None = None) -> None:
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
695
|
+
try:
|
696
|
+
db_dir = resolve_db_dir(db_dir)
|
697
|
+
for db in db_dir.glob("*.*.db"):
|
698
|
+
# this is a failsafe cleanup method for buffer db's leaked during
|
699
|
+
# abnormal terminations. therefore, it's not critical that we clean
|
700
|
+
# it up immediately. it's also possible that users are _sharing_
|
701
|
+
# their inspect_data_dir across multiple pid namespaces (e.g. in an
|
702
|
+
# effort to share their cache) one eval could remove the db of
|
703
|
+
# another running eval if we don't put in a delay.
|
704
|
+
if is_file_older_than(db, datetime.timedelta(days=3), default=False):
|
705
|
+
_, pid_str, _ = db.name.rsplit(".", 2)
|
706
|
+
if pid_str.isdigit():
|
707
|
+
pid = int(pid_str)
|
708
|
+
if not psutil.pid_exists(pid):
|
709
|
+
cleanup_sample_buffer_db(db)
|
710
|
+
except Exception as ex:
|
711
|
+
logger.warning(f"Error cleaning up sample buffer databases at {db_dir}: {ex}")
|
673
712
|
|
674
713
|
|
675
714
|
def cleanup_sample_buffer_db(path: Path) -> None:
|
inspect_ai/model/_model.py
CHANGED
@@ -560,7 +560,7 @@ class Model:
|
|
560
560
|
input = collapse_consecutive_assistant_messages(input)
|
561
561
|
|
562
562
|
# retry for transient http errors:
|
563
|
-
# -
|
563
|
+
# - use config.max_retries and config.timeout if specified, otherwise retry forever
|
564
564
|
# - exponential backoff starting at 3 seconds (will wait 25 minutes
|
565
565
|
# on the 10th retry,then will wait no longer than 30 minutes on
|
566
566
|
# subsequent retries)
|
@@ -96,9 +96,9 @@ def vertex() -> type[ModelAPI]:
|
|
96
96
|
verify_required_version(FEATURE, PACKAGE, MIN_VERSION)
|
97
97
|
|
98
98
|
# in the clear
|
99
|
-
from .vertex import VertexAPI
|
99
|
+
from .vertex import VertexAPI # type: ignore
|
100
100
|
|
101
|
-
return VertexAPI
|
101
|
+
return VertexAPI # type: ignore
|
102
102
|
|
103
103
|
|
104
104
|
@modelapi(name="google")
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# type: ignore
|
2
|
+
|
1
3
|
import functools
|
2
4
|
import json
|
3
5
|
from copy import copy
|
@@ -340,6 +342,7 @@ async def content_part(content: Content | str) -> Part:
|
|
340
342
|
if isinstance(content, ContentAudio):
|
341
343
|
file = content.audio
|
342
344
|
elif isinstance(content, ContentData):
|
345
|
+
file = ""
|
343
346
|
assert False, "Vertex provider should never encounter ContentData"
|
344
347
|
else:
|
345
348
|
# it's ContentVideo
|
inspect_ai/tool/_mcp/_mcp.py
CHANGED
@@ -15,6 +15,7 @@ from mcp.types import (
|
|
15
15
|
AudioContent,
|
16
16
|
EmbeddedResource,
|
17
17
|
ImageContent,
|
18
|
+
ResourceLink,
|
18
19
|
TextContent,
|
19
20
|
TextResourceContents,
|
20
21
|
)
|
@@ -283,7 +284,9 @@ def create_server_sandbox(
|
|
283
284
|
|
284
285
|
|
285
286
|
def tool_result_as_text(
|
286
|
-
content: list[
|
287
|
+
content: list[
|
288
|
+
TextContent | ImageContent | AudioContent | ResourceLink | EmbeddedResource
|
289
|
+
],
|
287
290
|
) -> str:
|
288
291
|
content_list: list[str] = []
|
289
292
|
for c in content:
|
@@ -293,6 +296,8 @@ def tool_result_as_text(
|
|
293
296
|
content_list.append("(base64 encoded image omitted)")
|
294
297
|
elif isinstance(c, AudioContent):
|
295
298
|
content_list.append("(base64 encoded audio omitted)")
|
299
|
+
elif isinstance(c, ResourceLink):
|
300
|
+
content_list.append(f"{c.description} ({c.uri})")
|
296
301
|
elif isinstance(c.resource, TextResourceContents):
|
297
302
|
content_list.append(c.resource.text)
|
298
303
|
|
inspect_ai/tool/_mcp/sampling.py
CHANGED
@@ -10,6 +10,7 @@ from mcp.types import (
|
|
10
10
|
EmbeddedResource,
|
11
11
|
ErrorData,
|
12
12
|
ImageContent,
|
13
|
+
ResourceLink,
|
13
14
|
TextContent,
|
14
15
|
TextResourceContents,
|
15
16
|
)
|
@@ -94,7 +95,11 @@ async def sampling_fn(
|
|
94
95
|
|
95
96
|
|
96
97
|
def as_inspect_content(
|
97
|
-
content: TextContent
|
98
|
+
content: TextContent
|
99
|
+
| ImageContent
|
100
|
+
| AudioContent
|
101
|
+
| ResourceLink
|
102
|
+
| EmbeddedResource,
|
98
103
|
) -> Content:
|
99
104
|
if isinstance(content, TextContent):
|
100
105
|
return ContentText(text=content.text)
|
@@ -107,6 +112,8 @@ def as_inspect_content(
|
|
107
112
|
audio=f"data:audio/{content.mimeType};base64,{content.data}",
|
108
113
|
format=_get_audio_format(content.mimeType),
|
109
114
|
)
|
115
|
+
elif isinstance(content, ResourceLink):
|
116
|
+
return ContentText(text=f"{content.description} ({content.uri})")
|
110
117
|
elif isinstance(content.resource, TextResourceContents):
|
111
118
|
return ContentText(text=content.resource.text)
|
112
119
|
else:
|
@@ -3,7 +3,6 @@ from typing import Annotated, Literal
|
|
3
3
|
|
4
4
|
from pydantic import BaseModel, Discriminator, Field, RootModel
|
5
5
|
from semver import Version
|
6
|
-
from shortuuid import uuid
|
7
6
|
|
8
7
|
from inspect_ai._util.error import PrerequisiteError
|
9
8
|
from inspect_ai.tool import ToolResult
|
@@ -82,7 +81,7 @@ def bash_session(
|
|
82
81
|
*,
|
83
82
|
timeout: int | None = None, # default is max_wait + 5 seconds
|
84
83
|
wait_for_output: int | None = None, # default is 30 seconds
|
85
|
-
instance: str | None =
|
84
|
+
instance: str | None = None,
|
86
85
|
) -> Tool:
|
87
86
|
"""Interactive bash shell session tool.
|
88
87
|
|
@@ -91,10 +90,8 @@ def bash_session(
|
|
91
90
|
which could be a command followed by a newline character or any other input
|
92
91
|
text such as the response to a password prompt.
|
93
92
|
|
94
|
-
|
95
|
-
call to `bash_session()
|
96
|
-
`instance=None` (which will result in a single bash process for the entire
|
97
|
-
sample) or use other `instance` values that implement another scheme).
|
93
|
+
To create a separate bash process for each
|
94
|
+
call to `bash_session()`, pass a unique value for `instance`
|
98
95
|
|
99
96
|
See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-bash-session>.
|
100
97
|
|
@@ -1,7 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
|
3
3
|
from pydantic import BaseModel, Field
|
4
|
-
from shortuuid import uuid
|
5
4
|
|
6
5
|
from inspect_ai._util.content import ContentText
|
7
6
|
from inspect_ai._util.error import PrerequisiteError
|
@@ -32,15 +31,11 @@ class CrawlerResult(BaseModel):
|
|
32
31
|
error: str | None = None
|
33
32
|
|
34
33
|
|
35
|
-
def web_browser(
|
36
|
-
*, interactive: bool = True, instance: str | None = uuid()
|
37
|
-
) -> list[Tool]:
|
34
|
+
def web_browser(*, interactive: bool = True, instance: str | None = None) -> list[Tool]:
|
38
35
|
"""Tools used for web browser navigation.
|
39
36
|
|
40
|
-
|
41
|
-
call to `web_browser()
|
42
|
-
(which will result in a single web browser for the entire sample) or use other
|
43
|
-
`instance` values that implement another scheme).
|
37
|
+
To create a separate web browser process for each
|
38
|
+
call to `web_browser()`, pass a unique value for `instance`.
|
44
39
|
|
45
40
|
See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
|
46
41
|
|
inspect_ai/util/_anyio.py
CHANGED
@@ -10,11 +10,20 @@ if sys.version_info < (3, 11):
|
|
10
10
|
|
11
11
|
|
12
12
|
def inner_exception(exc: Exception) -> Exception:
|
13
|
-
return _flatten_exception(exc)[0]
|
13
|
+
return _flatten_exception(exc, set())[0]
|
14
14
|
|
15
15
|
|
16
|
-
def _flatten_exception(exc: Exception) -> list[Exception]:
|
16
|
+
def _flatten_exception(exc: Exception, seen: set[int] | None = None) -> list[Exception]:
|
17
17
|
"""Recursively flatten an exception to get all related (__context__) and contained (ExceptionGroup) exceptions."""
|
18
|
+
if seen is None:
|
19
|
+
seen = set()
|
20
|
+
|
21
|
+
# Prevent infinite recursion by tracking seen exceptions by their id
|
22
|
+
exc_id = id(exc)
|
23
|
+
if exc_id in seen:
|
24
|
+
return []
|
25
|
+
seen.add(exc_id)
|
26
|
+
|
18
27
|
context_to_follow = (
|
19
28
|
[exc.__context__]
|
20
29
|
# conceptually, if __cause__ is present, it means that this exception
|
@@ -36,7 +45,7 @@ def _flatten_exception(exc: Exception) -> list[Exception]:
|
|
36
45
|
other_exceptions = [
|
37
46
|
flattened_e
|
38
47
|
for e in set(itertools.chain(context_to_follow, children_to_follow))
|
39
|
-
for flattened_e in _flatten_exception(e)
|
48
|
+
for flattened_e in _flatten_exception(e, seen)
|
40
49
|
]
|
41
50
|
|
42
51
|
return maybe_this_exception + other_exceptions
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.110
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
@@ -63,7 +63,7 @@ Requires-Dist: groq; extra == "dev"
|
|
63
63
|
Requires-Dist: ipython; extra == "dev"
|
64
64
|
Requires-Dist: jsonpath-ng; extra == "dev"
|
65
65
|
Requires-Dist: markdown; extra == "dev"
|
66
|
-
Requires-Dist: mcp>=1.
|
66
|
+
Requires-Dist: mcp>=1.10.0; extra == "dev"
|
67
67
|
Requires-Dist: mistralai; extra == "dev"
|
68
68
|
Requires-Dist: moto[server]; extra == "dev"
|
69
69
|
Requires-Dist: mypy>=1.16.0; extra == "dev"
|