PyPI - inspect-ai - Versions diffs - 0.3.94__py3-none-any.whl → 0.3.95__py3-none-any.whl - Mend

inspect-ai 0.3.94py3-none-any.whl → 0.3.95py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

inspect_ai/_eval/loader.py +1 -1
inspect_ai/_eval/task/run.py +12 -6
inspect_ai/_util/exception.py +4 -0
inspect_ai/_util/hash.py +39 -0
inspect_ai/_util/path.py +22 -0
inspect_ai/_util/trace.py +1 -1
inspect_ai/_util/working.py +4 -0
inspect_ai/_view/www/dist/assets/index.css +9 -9
inspect_ai/_view/www/dist/assets/index.js +117 -120
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
inspect_ai/_view/www/src/app/types.ts +12 -2
inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
inspect_ai/_view/www/src/state/hooks.ts +19 -3
inspect_ai/_view/www/src/state/logSlice.ts +23 -5
inspect_ai/_view/www/yarn.lock +9 -9
inspect_ai/agent/_bridge/patch.py +1 -3
inspect_ai/analysis/__init__.py +0 -0
inspect_ai/analysis/beta/__init__.py +57 -0
inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
inspect_ai/analysis/beta/_dataframe/evals/table.py +140 -0
inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/events/columns.py +37 -0
inspect_ai/analysis/beta/_dataframe/events/table.py +14 -0
inspect_ai/analysis/beta/_dataframe/extract.py +54 -0
inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
inspect_ai/analysis/beta/_dataframe/messages/table.py +87 -0
inspect_ai/analysis/beta/_dataframe/record.py +377 -0
inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/samples/columns.py +73 -0
inspect_ai/analysis/beta/_dataframe/samples/extract.py +82 -0
inspect_ai/analysis/beta/_dataframe/samples/table.py +329 -0
inspect_ai/analysis/beta/_dataframe/util.py +157 -0
inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
inspect_ai/log/_file.py +1 -1
inspect_ai/log/_log.py +21 -1
inspect_ai/model/_call_tools.py +2 -1
inspect_ai/model/_model.py +6 -4
inspect_ai/model/_openai_responses.py +17 -18
inspect_ai/model/_providers/anthropic.py +30 -5
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/solver/_multiple_choice.py +4 -1
inspect_ai/solver/_task_state.py +7 -3
inspect_ai/tool/_mcp/_context.py +3 -5
inspect_ai/tool/_mcp/server.py +1 -1
inspect_ai/tool/_tools/_think.py +1 -1
inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
inspect_ai/util/_sandbox/events.py +3 -2
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/METADATA +8 -1
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/RECORD +70 -43
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import Literal, Protocol, runtime_checkable
+from typing import Awaitable, Callable
 import anyio
 import httpx
@@ -16,8 +16,6 @@ from inspect_ai._util.error import PrerequisiteError
 from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
 from inspect_ai.util._concurrency import concurrency
-from .._tool import Tool, ToolResult, tool
 DEFAULT_RELEVANCE_PROMPT = """I am trying to answer the following question and need to find the most relevant information on the web. Please let me know if the following content is relevant to the question or not. You should just respond with "yes" or "no".
 Question: {question}
@@ -31,59 +29,35 @@ class SearchLink:
         self.snippet = snippet
-@runtime_checkable
-class SearchProvider(Protocol):
-    async def __call__(self, query: str, start_idx: int) -> list[SearchLink]: ...
-@tool
-def web_search(
-    provider: Literal["google"] = "google",
-    num_results: int = 3,
-    max_provider_calls: int = 3,
-    max_connections: int = 10,
-    model: str | None = None,
-) -> Tool:
-    """Web search tool.
-    A tool that can be registered for use by models to search the web. Use
-    the `use_tools()` solver to make the tool available (e.g. `use_tools(web_search())`))
-    A web search is conducted using the specified provider, the results are parsed for relevance
-    using the specified model, and the top 'num_results' relevant pages are returned.
-    See further documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-search>.
-    Args:
-      provider: Search provider (defaults to "google", currently
-        the only provider). Possible future providers include "brave" and "bing".
-      num_results: Number of web search result pages to return to the model.
-      max_provider_calls: Maximum number of search calls to make to the search provider.
-      max_connections: Maximum number of concurrent connections to API
-        endpoint of search provider.
-      model: Model used to parse web pages for relevance.
+def maybe_get_google_api_keys() -> tuple[str, str] | None:
+    """
+    Get Google API keys from environment variables.
     Returns:
-       A tool that can be registered for use by models to search the web.
+        tuple: A tuple containing the Google API key and the Google CSE ID.
     """
-    # get search client
-    client = httpx.AsyncClient()
+    google_api_key = os.environ.get("GOOGLE_CSE_API_KEY", None)
+    google_cse_id = os.environ.get("GOOGLE_CSE_ID", None)
+    return (google_api_key, google_cse_id) if google_api_key and google_cse_id else None
-    if provider == "google":
-        search_provider = google_search_provider(client)
-    else:
-        raise ValueError(
-            f"Provider {provider} not supported. Only 'google' is supported."
+def google_search_provider(
+    num_results: int,
+    max_provider_calls: int,
+    max_connections: int,
+    model: str | None,
+) -> Callable[[str], Awaitable[str | None]]:
+    keys = maybe_get_google_api_keys()
+    if not keys:
+        raise PrerequisiteError(
+            "GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.aisi.org.uk/tools.html#google-provider"
         )
+    google_api_key, google_cse_id = keys
-    # resolve provider (only google for now)
-    async def execute(query: str) -> ToolResult:
-        """
-        Use the web_search tool to perform keyword searches of the web.
+    # Create the client within the provider
+    client = httpx.AsyncClient()
-        Args:
-            query (str): Search query.
-        """
+    async def search(query: str) -> str | None:
         # limit number of concurrent searches
         page_contents: list[str] = []
         urls: list[str] = []
@@ -92,8 +66,8 @@ def web_search(
         # Paginate through search results until we have successfully extracted num_results pages or we have reached max_provider_calls
         while len(page_contents) < num_results and search_calls < max_provider_calls:
-            async with concurrency(f"{provider}_web_search", max_connections):
-                links = await search_provider(query, start_idx=search_calls * 10)
+            async with concurrency("google_web_search", max_connections):
+                links = await _search(query, start_idx=search_calls * 10)
             async with anyio.create_task_group() as tg:
@@ -114,19 +88,39 @@ def web_search(
             search_calls += 1
         all_page_contents = "\n\n".join(page_contents)
-        if all_page_contents == "":
-            response: ToolResult = (
-                "I'm sorry, I couldn't find any relevant information on the web."
-            )
-        else:
-            response = (
-                "Here are your web search results. Please read them carefully as they may be useful later! "
-                + all_page_contents
-            )
+        return None if all_page_contents == "" else all_page_contents
-        return response
+    async def _search(query: str, start_idx: int) -> list[SearchLink]:
+        # List of allowed parameters can be found https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
+        search_params = {
+            "q": query,
+            "key": google_api_key,
+            "cx": google_cse_id,
+            "start": start_idx,
+        }
+        search_url = "https://www.googleapis.com/customsearch/v1?" + "&".join(
+            [f"{key}={value}" for key, value in search_params.items()]
+        )
-    return execute
+        # retry up to 5 times over a period of up to 1 minute
+        @retry(
+            wait=wait_exponential_jitter(),
+            stop=stop_after_attempt(5) | stop_after_delay(60),
+            retry=retry_if_exception(httpx_should_retry),
+            before_sleep=log_httpx_retry_attempt(search_url),
+        )
+        async def execute_search() -> httpx.Response:
+            return await client.get(search_url)
+        result = await execute_search()
+        data = result.json()
+        if "items" in data:
+            return [SearchLink(item["link"], item["snippet"]) for item in data["items"]]
+        else:
+            return []
+    return search
 async def page_if_relevant(
@@ -183,44 +177,3 @@ async def page_if_relevant(
         return full_text
     else:
         return None
-def google_search_provider(client: httpx.AsyncClient) -> SearchProvider:
-    google_api_key = os.environ.get("GOOGLE_CSE_API_KEY", None)
-    google_cse_id = os.environ.get("GOOGLE_CSE_ID", None)
-    if not google_api_key or not google_cse_id:
-        raise PrerequisiteError(
-            "GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.aisi.org.uk/tools.html#google-provider"
-        )
-    async def search(query: str, start_idx: int) -> list[SearchLink]:
-        # List of allowed parameters can be found https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
-        search_params = {
-            "q": query,
-            "key": google_api_key,
-            "cx": google_cse_id,
-            "start": start_idx,
-        }
-        search_url = "https://www.googleapis.com/customsearch/v1?" + "&".join(
-            [f"{key}={value}" for key, value in search_params.items()]
-        )
-        # retry up to 5 times over a period of up to 1 minute
-        @retry(
-            wait=wait_exponential_jitter(),
-            stop=stop_after_attempt(5) | stop_after_delay(60),
-            retry=retry_if_exception(httpx_should_retry),
-            before_sleep=log_httpx_retry_attempt(search_url),
-        )
-        async def execute_search() -> httpx.Response:
-            return await client.get(search_url)
-        result = await execute_search()
-        data = result.json()
-        if "items" in data:
-            return [SearchLink(item["link"], item["snippet"]) for item in data["items"]]
-        else:
-            return []
-    return search

inspect_ai/tool/_tools/_web_search/_tavily.py ADDED Viewed

@@ -0,0 +1,77 @@
+import os
+from typing import Awaitable, Callable
+import httpx
+from pydantic import BaseModel, Field
+from tenacity import (
+    retry,
+    retry_if_exception,
+    stop_after_attempt,
+    stop_after_delay,
+    wait_exponential_jitter,
+)
+from inspect_ai._util.error import PrerequisiteError
+from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
+from inspect_ai.util._concurrency import concurrency
+class TavilySearchResult(BaseModel):
+    title: str
+    url: str
+    content: str
+    score: float
+class TavilySearchResponse(BaseModel):
+    query: str
+    answer: str | None = Field(default=None)
+    images: list[object]
+    results: list[TavilySearchResult]
+    response_time: float
+def tavily_search_provider(
+    num_results: int, max_connections: int
+) -> Callable[[str], Awaitable[str | None]]:
+    tavily_api_key = os.environ.get("TAVILY_API_KEY", None)
+    if not tavily_api_key:
+        raise PrerequisiteError(
+            "TAVILY_API_KEY not set in the environment. Please ensure ths variable is defined to use Tavily with the web_search tool.\n\nLearn more about the Tavily web search provider at https://inspect.aisi.org.uk/tools.html#tavily-provider"
+        )
+    if num_results > 20:
+        raise PrerequisiteError(
+            "The Tavily search provider is limited to 20 results per query."
+        )
+    # Create the client within the provider
+    client = httpx.AsyncClient(timeout=30)
+    async def search(query: str) -> str | None:
+        search_url = "https://api.tavily.com/search"
+        headers = {
+            "Authorization": f"Bearer {tavily_api_key}",
+        }
+        body = {
+            "query": query,
+            "max_results": 10,  # num_results,
+            # "search_depth": "advanced",
+            "include_answer": "advanced",
+        }
+        # retry up to 5 times over a period of up to 1 minute
+        @retry(
+            wait=wait_exponential_jitter(),
+            stop=stop_after_attempt(5) | stop_after_delay(60),
+            retry=retry_if_exception(httpx_should_retry),
+            before_sleep=log_httpx_retry_attempt(search_url),
+        )
+        async def _search() -> httpx.Response:
+            response = await client.post(search_url, headers=headers, json=body)
+            response.raise_for_status()
+            return response
+        async with concurrency("tavily_web_search", max_connections):
+            return TavilySearchResponse.model_validate((await _search()).json()).answer
+    return search

inspect_ai/tool/_tools/_web_search/_web_search.py ADDED Viewed

@@ -0,0 +1,85 @@
+from typing import Literal
+from inspect_ai._util.deprecation import deprecation_warning
+from ..._tool import Tool, ToolResult, tool
+from ._google import google_search_provider, maybe_get_google_api_keys
+from ._tavily import tavily_search_provider
+@tool
+def web_search(
+    provider: Literal["tavily", "google"] | None = None,
+    num_results: int = 3,
+    max_provider_calls: int = 3,
+    max_connections: int = 10,
+    model: str | None = None,
+) -> Tool:
+    """Web search tool.
+    A tool that can be registered for use by models to search the web. Use
+    the `use_tools()` solver to make the tool available (e.g.
+    `use_tools(web_search(provider="tavily"))`))
+    A web search is conducted using the specified provider.
+    - When using Tavily, all logic for relevance and summarization is handled by
+    the Tavily API.
+    - When using Google, the results are parsed for relevance using the specified
+    model, and the top 'num_results' relevant pages are returned.
+    See further documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-search>.
+    Args:
+      provider: Search provider to use:
+        - "tavily": Uses Tavily's Research API.
+        - "google": Uses Google Custom Search.
+        Note: The `| None` type is only for backwards compatibility. Passing
+        `None` is deprecated.
+      num_results: The number of search result pages used to provide information
+        back to the model.
+      max_provider_calls: Maximum number of search calls to make to the search
+        provider.
+      max_connections: Maximum number of concurrent connections to API endpoint
+        of search provider.
+      model: Model used to parse web pages for relevance - used only by the
+        `google` provider.
+    Returns:
+       A tool that can be registered for use by models to search the web.
+    """
+    if provider is None:
+        if maybe_get_google_api_keys():
+            deprecation_warning(
+                "The `google` `web_search` provider was inferred based on the presence of environment variables. Please specify the provider explicitly to avoid this warning."
+            )
+            provider = "google"
+        else:
+            raise ValueError(
+                "Omitting `provider` is no longer supported. Please specify the `web_search` provider explicitly to avoid this error."
+            )
+    search_provider = (
+        google_search_provider(num_results, max_provider_calls, max_connections, model)
+        if provider == "google"
+        else tavily_search_provider(num_results, max_connections)
+    )
+    async def execute(query: str) -> ToolResult:
+        """
+        Use the web_search tool to perform keyword searches of the web.
+        Args:
+            query (str): Search query.
+        """
+        search_result = await search_provider(query)
+        return (
+            (
+                "Here are your web search results. Please read them carefully as they may be useful later!\n"
+                + search_result
+            )
+            if search_result
+            else ("I'm sorry, I couldn't find any relevant information on the web.")
+        )
+    return execute

inspect_ai/util/_sandbox/events.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import contextlib
 import shlex
 from datetime import datetime
-from typing import Iterator, Literal, Type, Union, overload
+from typing import Any, Iterator, Literal, Type, Union, overload
 from pydantic import JsonValue
 from pydantic_core import to_jsonable_python
@@ -134,7 +134,8 @@ class SandboxEnvironmentProxy(SandboxEnvironment):
     @override
     async def connection(self, *, user: str | None = None) -> SandboxConnection:
-        return await self._sandbox.connection(user=user)
+        params: dict[str, Any] = {"user": user} if user is not None else {}
+        return await self._sandbox.connection(**params)
     @override
     def as_type(self, sandbox_cls: Type[ST]) -> ST:

{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspect_ai
-Version: 0.3.94
+Version: 0.3.95
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License
@@ -32,6 +32,8 @@ Requires-Dist: httpx
 Requires-Dist: ijson>=3.2.0
 Requires-Dist: jsonlines>=3.0.0
 Requires-Dist: jsonpatch>=1.32
+Requires-Dist: jsonpath-ng>=1.7.0
+Requires-Dist: jsonref>=1.1.0
 Requires-Dist: jsonschema>3.1.1
 Requires-Dist: mmh3>3.1.0
 Requires-Dist: nest_asyncio
@@ -59,6 +61,7 @@ Requires-Dist: google-genai; extra == "dev"
 Requires-Dist: griffe; extra == "dev"
 Requires-Dist: groq; extra == "dev"
 Requires-Dist: ipython; extra == "dev"
+Requires-Dist: jsonpath-ng; extra == "dev"
 Requires-Dist: markdown; extra == "dev"
 Requires-Dist: mcp; extra == "dev"
 Requires-Dist: mistralai; extra == "dev"
@@ -66,9 +69,11 @@ Requires-Dist: moto[server]; extra == "dev"
 Requires-Dist: mypy; extra == "dev"
 Requires-Dist: nbformat; extra == "dev"
 Requires-Dist: openai; extra == "dev"
+Requires-Dist: pandas>=2.0.0; extra == "dev"
 Requires-Dist: panflute; extra == "dev"
 Requires-Dist: pip; extra == "dev"
 Requires-Dist: pre-commit; extra == "dev"
+Requires-Dist: pyarrow>=10.0.1; extra == "dev"
 Requires-Dist: pylint; extra == "dev"
 Requires-Dist: pytest; extra == "dev"
 Requires-Dist: pytest-asyncio; extra == "dev"
@@ -78,6 +83,8 @@ Requires-Dist: pytest-xdist; extra == "dev"
 Requires-Dist: ruff==0.9.6; extra == "dev"
 Requires-Dist: textual-dev>=0.86.2; extra == "dev"
 Requires-Dist: trio; extra == "dev"
+Requires-Dist: pandas-stubs; extra == "dev"
+Requires-Dist: pyarrow-stubs; extra == "dev"
 Requires-Dist: types-Markdown; extra == "dev"
 Requires-Dist: types-PyYAML; extra == "dev"
 Requires-Dist: types-beautifulsoup4; extra == "dev"

inspect-ai 0.3.94__py3-none-any.whl → 0.3.95__py3-none-any.whl

inspect-ai 0.3.94py3-none-any.whl → 0.3.95py3-none-any.whl