inspect-ai 0.3.94__py3-none-any.whl → 0.3.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. inspect_ai/_eval/loader.py +1 -1
  2. inspect_ai/_eval/task/run.py +12 -6
  3. inspect_ai/_util/exception.py +4 -0
  4. inspect_ai/_util/hash.py +39 -0
  5. inspect_ai/_util/path.py +22 -0
  6. inspect_ai/_util/trace.py +1 -1
  7. inspect_ai/_util/working.py +4 -0
  8. inspect_ai/_view/www/dist/assets/index.css +9 -9
  9. inspect_ai/_view/www/dist/assets/index.js +117 -120
  10. inspect_ai/_view/www/package.json +1 -1
  11. inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
  12. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
  13. inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
  14. inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
  15. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
  16. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
  17. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
  18. inspect_ai/_view/www/src/app/types.ts +12 -2
  19. inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
  20. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
  21. inspect_ai/_view/www/src/state/hooks.ts +19 -3
  22. inspect_ai/_view/www/src/state/logSlice.ts +23 -5
  23. inspect_ai/_view/www/yarn.lock +9 -9
  24. inspect_ai/agent/_bridge/patch.py +1 -3
  25. inspect_ai/analysis/__init__.py +0 -0
  26. inspect_ai/analysis/beta/__init__.py +57 -0
  27. inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
  28. inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
  29. inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
  30. inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
  31. inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
  32. inspect_ai/analysis/beta/_dataframe/evals/table.py +140 -0
  33. inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
  34. inspect_ai/analysis/beta/_dataframe/events/columns.py +37 -0
  35. inspect_ai/analysis/beta/_dataframe/events/table.py +14 -0
  36. inspect_ai/analysis/beta/_dataframe/extract.py +54 -0
  37. inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
  38. inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
  39. inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
  40. inspect_ai/analysis/beta/_dataframe/messages/table.py +87 -0
  41. inspect_ai/analysis/beta/_dataframe/record.py +377 -0
  42. inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
  43. inspect_ai/analysis/beta/_dataframe/samples/columns.py +73 -0
  44. inspect_ai/analysis/beta/_dataframe/samples/extract.py +82 -0
  45. inspect_ai/analysis/beta/_dataframe/samples/table.py +329 -0
  46. inspect_ai/analysis/beta/_dataframe/util.py +157 -0
  47. inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
  48. inspect_ai/log/_file.py +1 -1
  49. inspect_ai/log/_log.py +21 -1
  50. inspect_ai/model/_call_tools.py +2 -1
  51. inspect_ai/model/_model.py +6 -4
  52. inspect_ai/model/_openai_responses.py +17 -18
  53. inspect_ai/model/_providers/anthropic.py +30 -5
  54. inspect_ai/model/_providers/providers.py +1 -1
  55. inspect_ai/solver/_multiple_choice.py +4 -1
  56. inspect_ai/solver/_task_state.py +7 -3
  57. inspect_ai/tool/_mcp/_context.py +3 -5
  58. inspect_ai/tool/_mcp/server.py +1 -1
  59. inspect_ai/tool/_tools/_think.py +1 -1
  60. inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
  61. inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
  62. inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
  63. inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
  64. inspect_ai/util/_sandbox/events.py +3 -2
  65. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/METADATA +8 -1
  66. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/RECORD +70 -43
  67. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/WHEEL +1 -1
  68. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/entry_points.txt +0 -0
  69. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/licenses/LICENSE +0 -0
  70. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Literal, Protocol, runtime_checkable
2
+ from typing import Awaitable, Callable
3
3
 
4
4
  import anyio
5
5
  import httpx
@@ -16,8 +16,6 @@ from inspect_ai._util.error import PrerequisiteError
16
16
  from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
17
17
  from inspect_ai.util._concurrency import concurrency
18
18
 
19
- from .._tool import Tool, ToolResult, tool
20
-
21
19
  DEFAULT_RELEVANCE_PROMPT = """I am trying to answer the following question and need to find the most relevant information on the web. Please let me know if the following content is relevant to the question or not. You should just respond with "yes" or "no".
22
20
 
23
21
  Question: {question}
@@ -31,59 +29,35 @@ class SearchLink:
31
29
  self.snippet = snippet
32
30
 
33
31
 
34
- @runtime_checkable
35
- class SearchProvider(Protocol):
36
- async def __call__(self, query: str, start_idx: int) -> list[SearchLink]: ...
37
-
38
-
39
- @tool
40
- def web_search(
41
- provider: Literal["google"] = "google",
42
- num_results: int = 3,
43
- max_provider_calls: int = 3,
44
- max_connections: int = 10,
45
- model: str | None = None,
46
- ) -> Tool:
47
- """Web search tool.
48
-
49
- A tool that can be registered for use by models to search the web. Use
50
- the `use_tools()` solver to make the tool available (e.g. `use_tools(web_search())`))
51
-
52
- A web search is conducted using the specified provider, the results are parsed for relevance
53
- using the specified model, and the top 'num_results' relevant pages are returned.
54
-
55
- See further documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-search>.
56
-
57
- Args:
58
- provider: Search provider (defaults to "google", currently
59
- the only provider). Possible future providers include "brave" and "bing".
60
- num_results: Number of web search result pages to return to the model.
61
- max_provider_calls: Maximum number of search calls to make to the search provider.
62
- max_connections: Maximum number of concurrent connections to API
63
- endpoint of search provider.
64
- model: Model used to parse web pages for relevance.
32
+ def maybe_get_google_api_keys() -> tuple[str, str] | None:
33
+ """
34
+ Get Google API keys from environment variables.
65
35
 
66
36
  Returns:
67
- A tool that can be registered for use by models to search the web.
37
+ tuple: A tuple containing the Google API key and the Google CSE ID.
68
38
  """
69
- # get search client
70
- client = httpx.AsyncClient()
39
+ google_api_key = os.environ.get("GOOGLE_CSE_API_KEY", None)
40
+ google_cse_id = os.environ.get("GOOGLE_CSE_ID", None)
41
+ return (google_api_key, google_cse_id) if google_api_key and google_cse_id else None
71
42
 
72
- if provider == "google":
73
- search_provider = google_search_provider(client)
74
- else:
75
- raise ValueError(
76
- f"Provider {provider} not supported. Only 'google' is supported."
43
+
44
+ def google_search_provider(
45
+ num_results: int,
46
+ max_provider_calls: int,
47
+ max_connections: int,
48
+ model: str | None,
49
+ ) -> Callable[[str], Awaitable[str | None]]:
50
+ keys = maybe_get_google_api_keys()
51
+ if not keys:
52
+ raise PrerequisiteError(
53
+ "GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.aisi.org.uk/tools.html#google-provider"
77
54
  )
55
+ google_api_key, google_cse_id = keys
78
56
 
79
- # resolve provider (only google for now)
80
- async def execute(query: str) -> ToolResult:
81
- """
82
- Use the web_search tool to perform keyword searches of the web.
57
+ # Create the client within the provider
58
+ client = httpx.AsyncClient()
83
59
 
84
- Args:
85
- query (str): Search query.
86
- """
60
+ async def search(query: str) -> str | None:
87
61
  # limit number of concurrent searches
88
62
  page_contents: list[str] = []
89
63
  urls: list[str] = []
@@ -92,8 +66,8 @@ def web_search(
92
66
 
93
67
  # Paginate through search results until we have successfully extracted num_results pages or we have reached max_provider_calls
94
68
  while len(page_contents) < num_results and search_calls < max_provider_calls:
95
- async with concurrency(f"{provider}_web_search", max_connections):
96
- links = await search_provider(query, start_idx=search_calls * 10)
69
+ async with concurrency("google_web_search", max_connections):
70
+ links = await _search(query, start_idx=search_calls * 10)
97
71
 
98
72
  async with anyio.create_task_group() as tg:
99
73
 
@@ -114,19 +88,39 @@ def web_search(
114
88
  search_calls += 1
115
89
 
116
90
  all_page_contents = "\n\n".join(page_contents)
117
- if all_page_contents == "":
118
- response: ToolResult = (
119
- "I'm sorry, I couldn't find any relevant information on the web."
120
- )
121
- else:
122
- response = (
123
- "Here are your web search results. Please read them carefully as they may be useful later! "
124
- + all_page_contents
125
- )
91
+ return None if all_page_contents == "" else all_page_contents
126
92
 
127
- return response
93
+ async def _search(query: str, start_idx: int) -> list[SearchLink]:
94
+ # List of allowed parameters can be found https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
95
+ search_params = {
96
+ "q": query,
97
+ "key": google_api_key,
98
+ "cx": google_cse_id,
99
+ "start": start_idx,
100
+ }
101
+ search_url = "https://www.googleapis.com/customsearch/v1?" + "&".join(
102
+ [f"{key}={value}" for key, value in search_params.items()]
103
+ )
128
104
 
129
- return execute
105
+ # retry up to 5 times over a period of up to 1 minute
106
+ @retry(
107
+ wait=wait_exponential_jitter(),
108
+ stop=stop_after_attempt(5) | stop_after_delay(60),
109
+ retry=retry_if_exception(httpx_should_retry),
110
+ before_sleep=log_httpx_retry_attempt(search_url),
111
+ )
112
+ async def execute_search() -> httpx.Response:
113
+ return await client.get(search_url)
114
+
115
+ result = await execute_search()
116
+ data = result.json()
117
+
118
+ if "items" in data:
119
+ return [SearchLink(item["link"], item["snippet"]) for item in data["items"]]
120
+ else:
121
+ return []
122
+
123
+ return search
130
124
 
131
125
 
132
126
  async def page_if_relevant(
@@ -183,44 +177,3 @@ async def page_if_relevant(
183
177
  return full_text
184
178
  else:
185
179
  return None
186
-
187
-
188
- def google_search_provider(client: httpx.AsyncClient) -> SearchProvider:
189
- google_api_key = os.environ.get("GOOGLE_CSE_API_KEY", None)
190
- google_cse_id = os.environ.get("GOOGLE_CSE_ID", None)
191
- if not google_api_key or not google_cse_id:
192
- raise PrerequisiteError(
193
- "GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.aisi.org.uk/tools.html#google-provider"
194
- )
195
-
196
- async def search(query: str, start_idx: int) -> list[SearchLink]:
197
- # List of allowed parameters can be found https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
198
- search_params = {
199
- "q": query,
200
- "key": google_api_key,
201
- "cx": google_cse_id,
202
- "start": start_idx,
203
- }
204
- search_url = "https://www.googleapis.com/customsearch/v1?" + "&".join(
205
- [f"{key}={value}" for key, value in search_params.items()]
206
- )
207
-
208
- # retry up to 5 times over a period of up to 1 minute
209
- @retry(
210
- wait=wait_exponential_jitter(),
211
- stop=stop_after_attempt(5) | stop_after_delay(60),
212
- retry=retry_if_exception(httpx_should_retry),
213
- before_sleep=log_httpx_retry_attempt(search_url),
214
- )
215
- async def execute_search() -> httpx.Response:
216
- return await client.get(search_url)
217
-
218
- result = await execute_search()
219
- data = result.json()
220
-
221
- if "items" in data:
222
- return [SearchLink(item["link"], item["snippet"]) for item in data["items"]]
223
- else:
224
- return []
225
-
226
- return search
@@ -0,0 +1,77 @@
1
+ import os
2
+ from typing import Awaitable, Callable
3
+
4
+ import httpx
5
+ from pydantic import BaseModel, Field
6
+ from tenacity import (
7
+ retry,
8
+ retry_if_exception,
9
+ stop_after_attempt,
10
+ stop_after_delay,
11
+ wait_exponential_jitter,
12
+ )
13
+
14
+ from inspect_ai._util.error import PrerequisiteError
15
+ from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
16
+ from inspect_ai.util._concurrency import concurrency
17
+
18
+
19
+ class TavilySearchResult(BaseModel):
20
+ title: str
21
+ url: str
22
+ content: str
23
+ score: float
24
+
25
+
26
+ class TavilySearchResponse(BaseModel):
27
+ query: str
28
+ answer: str | None = Field(default=None)
29
+ images: list[object]
30
+ results: list[TavilySearchResult]
31
+ response_time: float
32
+
33
+
34
+ def tavily_search_provider(
35
+ num_results: int, max_connections: int
36
+ ) -> Callable[[str], Awaitable[str | None]]:
37
+ tavily_api_key = os.environ.get("TAVILY_API_KEY", None)
38
+ if not tavily_api_key:
39
+ raise PrerequisiteError(
40
+ "TAVILY_API_KEY not set in the environment. Please ensure ths variable is defined to use Tavily with the web_search tool.\n\nLearn more about the Tavily web search provider at https://inspect.aisi.org.uk/tools.html#tavily-provider"
41
+ )
42
+ if num_results > 20:
43
+ raise PrerequisiteError(
44
+ "The Tavily search provider is limited to 20 results per query."
45
+ )
46
+
47
+ # Create the client within the provider
48
+ client = httpx.AsyncClient(timeout=30)
49
+
50
+ async def search(query: str) -> str | None:
51
+ search_url = "https://api.tavily.com/search"
52
+ headers = {
53
+ "Authorization": f"Bearer {tavily_api_key}",
54
+ }
55
+ body = {
56
+ "query": query,
57
+ "max_results": 10, # num_results,
58
+ # "search_depth": "advanced",
59
+ "include_answer": "advanced",
60
+ }
61
+
62
+ # retry up to 5 times over a period of up to 1 minute
63
+ @retry(
64
+ wait=wait_exponential_jitter(),
65
+ stop=stop_after_attempt(5) | stop_after_delay(60),
66
+ retry=retry_if_exception(httpx_should_retry),
67
+ before_sleep=log_httpx_retry_attempt(search_url),
68
+ )
69
+ async def _search() -> httpx.Response:
70
+ response = await client.post(search_url, headers=headers, json=body)
71
+ response.raise_for_status()
72
+ return response
73
+
74
+ async with concurrency("tavily_web_search", max_connections):
75
+ return TavilySearchResponse.model_validate((await _search()).json()).answer
76
+
77
+ return search
@@ -0,0 +1,85 @@
1
+ from typing import Literal
2
+
3
+ from inspect_ai._util.deprecation import deprecation_warning
4
+
5
+ from ..._tool import Tool, ToolResult, tool
6
+ from ._google import google_search_provider, maybe_get_google_api_keys
7
+ from ._tavily import tavily_search_provider
8
+
9
+
10
+ @tool
11
+ def web_search(
12
+ provider: Literal["tavily", "google"] | None = None,
13
+ num_results: int = 3,
14
+ max_provider_calls: int = 3,
15
+ max_connections: int = 10,
16
+ model: str | None = None,
17
+ ) -> Tool:
18
+ """Web search tool.
19
+
20
+ A tool that can be registered for use by models to search the web. Use
21
+ the `use_tools()` solver to make the tool available (e.g.
22
+ `use_tools(web_search(provider="tavily"))`))
23
+
24
+ A web search is conducted using the specified provider.
25
+ - When using Tavily, all logic for relevance and summarization is handled by
26
+ the Tavily API.
27
+ - When using Google, the results are parsed for relevance using the specified
28
+ model, and the top 'num_results' relevant pages are returned.
29
+
30
+ See further documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-search>.
31
+
32
+ Args:
33
+ provider: Search provider to use:
34
+ - "tavily": Uses Tavily's Research API.
35
+ - "google": Uses Google Custom Search.
36
+ Note: The `| None` type is only for backwards compatibility. Passing
37
+ `None` is deprecated.
38
+ num_results: The number of search result pages used to provide information
39
+ back to the model.
40
+ max_provider_calls: Maximum number of search calls to make to the search
41
+ provider.
42
+ max_connections: Maximum number of concurrent connections to API endpoint
43
+ of search provider.
44
+ model: Model used to parse web pages for relevance - used only by the
45
+ `google` provider.
46
+
47
+ Returns:
48
+ A tool that can be registered for use by models to search the web.
49
+ """
50
+ if provider is None:
51
+ if maybe_get_google_api_keys():
52
+ deprecation_warning(
53
+ "The `google` `web_search` provider was inferred based on the presence of environment variables. Please specify the provider explicitly to avoid this warning."
54
+ )
55
+ provider = "google"
56
+ else:
57
+ raise ValueError(
58
+ "Omitting `provider` is no longer supported. Please specify the `web_search` provider explicitly to avoid this error."
59
+ )
60
+
61
+ search_provider = (
62
+ google_search_provider(num_results, max_provider_calls, max_connections, model)
63
+ if provider == "google"
64
+ else tavily_search_provider(num_results, max_connections)
65
+ )
66
+
67
+ async def execute(query: str) -> ToolResult:
68
+ """
69
+ Use the web_search tool to perform keyword searches of the web.
70
+
71
+ Args:
72
+ query (str): Search query.
73
+ """
74
+ search_result = await search_provider(query)
75
+
76
+ return (
77
+ (
78
+ "Here are your web search results. Please read them carefully as they may be useful later!\n"
79
+ + search_result
80
+ )
81
+ if search_result
82
+ else ("I'm sorry, I couldn't find any relevant information on the web.")
83
+ )
84
+
85
+ return execute
@@ -1,7 +1,7 @@
1
1
  import contextlib
2
2
  import shlex
3
3
  from datetime import datetime
4
- from typing import Iterator, Literal, Type, Union, overload
4
+ from typing import Any, Iterator, Literal, Type, Union, overload
5
5
 
6
6
  from pydantic import JsonValue
7
7
  from pydantic_core import to_jsonable_python
@@ -134,7 +134,8 @@ class SandboxEnvironmentProxy(SandboxEnvironment):
134
134
 
135
135
  @override
136
136
  async def connection(self, *, user: str | None = None) -> SandboxConnection:
137
- return await self._sandbox.connection(user=user)
137
+ params: dict[str, Any] = {"user": user} if user is not None else {}
138
+ return await self._sandbox.connection(**params)
138
139
 
139
140
  @override
140
141
  def as_type(self, sandbox_cls: Type[ST]) -> ST:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.94
3
+ Version: 0.3.95
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -32,6 +32,8 @@ Requires-Dist: httpx
32
32
  Requires-Dist: ijson>=3.2.0
33
33
  Requires-Dist: jsonlines>=3.0.0
34
34
  Requires-Dist: jsonpatch>=1.32
35
+ Requires-Dist: jsonpath-ng>=1.7.0
36
+ Requires-Dist: jsonref>=1.1.0
35
37
  Requires-Dist: jsonschema>3.1.1
36
38
  Requires-Dist: mmh3>3.1.0
37
39
  Requires-Dist: nest_asyncio
@@ -59,6 +61,7 @@ Requires-Dist: google-genai; extra == "dev"
59
61
  Requires-Dist: griffe; extra == "dev"
60
62
  Requires-Dist: groq; extra == "dev"
61
63
  Requires-Dist: ipython; extra == "dev"
64
+ Requires-Dist: jsonpath-ng; extra == "dev"
62
65
  Requires-Dist: markdown; extra == "dev"
63
66
  Requires-Dist: mcp; extra == "dev"
64
67
  Requires-Dist: mistralai; extra == "dev"
@@ -66,9 +69,11 @@ Requires-Dist: moto[server]; extra == "dev"
66
69
  Requires-Dist: mypy; extra == "dev"
67
70
  Requires-Dist: nbformat; extra == "dev"
68
71
  Requires-Dist: openai; extra == "dev"
72
+ Requires-Dist: pandas>=2.0.0; extra == "dev"
69
73
  Requires-Dist: panflute; extra == "dev"
70
74
  Requires-Dist: pip; extra == "dev"
71
75
  Requires-Dist: pre-commit; extra == "dev"
76
+ Requires-Dist: pyarrow>=10.0.1; extra == "dev"
72
77
  Requires-Dist: pylint; extra == "dev"
73
78
  Requires-Dist: pytest; extra == "dev"
74
79
  Requires-Dist: pytest-asyncio; extra == "dev"
@@ -78,6 +83,8 @@ Requires-Dist: pytest-xdist; extra == "dev"
78
83
  Requires-Dist: ruff==0.9.6; extra == "dev"
79
84
  Requires-Dist: textual-dev>=0.86.2; extra == "dev"
80
85
  Requires-Dist: trio; extra == "dev"
86
+ Requires-Dist: pandas-stubs; extra == "dev"
87
+ Requires-Dist: pyarrow-stubs; extra == "dev"
81
88
  Requires-Dist: types-Markdown; extra == "dev"
82
89
  Requires-Dist: types-PyYAML; extra == "dev"
83
90
  Requires-Dist: types-beautifulsoup4; extra == "dev"