PyPI - inspect-ai - Versions diffs - 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl - Mend

inspect-ai 0.3.103py3-none-any.whl → 0.3.104py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

inspect_ai/_cli/common.py +2 -1
inspect_ai/_cli/eval.py +2 -2
inspect_ai/_display/core/active.py +3 -0
inspect_ai/_display/core/config.py +1 -0
inspect_ai/_display/core/panel.py +21 -13
inspect_ai/_display/core/results.py +3 -7
inspect_ai/_display/core/rich.py +3 -5
inspect_ai/_display/log/__init__.py +0 -0
inspect_ai/_display/log/display.py +173 -0
inspect_ai/_display/plain/display.py +2 -2
inspect_ai/_display/rich/display.py +2 -4
inspect_ai/_display/textual/app.py +1 -6
inspect_ai/_display/textual/widgets/task_detail.py +3 -14
inspect_ai/_display/textual/widgets/tasks.py +1 -1
inspect_ai/_eval/eval.py +1 -1
inspect_ai/_eval/evalset.py +2 -2
inspect_ai/_eval/registry.py +6 -1
inspect_ai/_eval/run.py +5 -1
inspect_ai/_eval/task/constants.py +1 -0
inspect_ai/_eval/task/log.py +2 -0
inspect_ai/_eval/task/run.py +1 -1
inspect_ai/_util/citation.py +88 -0
inspect_ai/_util/content.py +24 -2
inspect_ai/_util/json.py +17 -2
inspect_ai/_util/registry.py +19 -4
inspect_ai/_view/schema.py +0 -6
inspect_ai/_view/www/dist/assets/index.css +82 -24
inspect_ai/_view/www/dist/assets/index.js +10124 -9808
inspect_ai/_view/www/log-schema.json +418 -1
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/@types/log.d.ts +140 -39
inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
inspect_ai/_view/www/src/tests/README.md +2 -2
inspect_ai/_view/www/src/utils/git.ts +3 -1
inspect_ai/_view/www/src/utils/html.ts +6 -0
inspect_ai/agent/_handoff.py +3 -3
inspect_ai/log/_condense.py +5 -0
inspect_ai/log/_file.py +4 -1
inspect_ai/log/_log.py +9 -4
inspect_ai/log/_recorders/json.py +4 -2
inspect_ai/log/_util.py +2 -0
inspect_ai/model/__init__.py +14 -0
inspect_ai/model/_call_tools.py +13 -4
inspect_ai/model/_chat_message.py +3 -0
inspect_ai/model/_openai_responses.py +80 -34
inspect_ai/model/_providers/_anthropic_citations.py +158 -0
inspect_ai/model/_providers/_google_citations.py +100 -0
inspect_ai/model/_providers/anthropic.py +196 -34
inspect_ai/model/_providers/google.py +94 -22
inspect_ai/model/_providers/mistral.py +20 -7
inspect_ai/model/_providers/openai.py +11 -10
inspect_ai/model/_providers/openai_compatible.py +3 -2
inspect_ai/model/_providers/openai_responses.py +2 -5
inspect_ai/model/_providers/perplexity.py +123 -0
inspect_ai/model/_providers/providers.py +13 -2
inspect_ai/model/_providers/vertex.py +3 -0
inspect_ai/model/_trim.py +5 -0
inspect_ai/tool/__init__.py +14 -0
inspect_ai/tool/_mcp/_mcp.py +5 -2
inspect_ai/tool/_mcp/sampling.py +19 -3
inspect_ai/tool/_mcp/server.py +1 -1
inspect_ai/tool/_tool.py +10 -1
inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
inspect_ai/tool/_tools/_web_search/_google.py +22 -25
inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
inspect_ai/util/_display.py +11 -2
inspect_ai/util/_sandbox/docker/compose.py +2 -2
inspect_ai/util/_span.py +12 -1
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +110 -86
/inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
/inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tools/_web_search/_google.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-from typing import Awaitable, Callable
 import anyio
 import httpx
@@ -13,10 +12,14 @@ from tenacity import (
     wait_exponential_jitter,
 )
+from inspect_ai._util.citation import UrlCitation
+from inspect_ai._util.content import ContentText
 from inspect_ai._util.error import PrerequisiteError
 from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
 from inspect_ai.util._concurrency import concurrency
+from ._web_search_provider import SearchProvider
 DEFAULT_RELEVANCE_PROMPT = """I am trying to answer the following question and need to find the most relevant information on the web. Please let me know if the following content is relevant to the question or not. You should just respond with "yes" or "no".
 Question: {question}
@@ -52,7 +55,7 @@ def maybe_get_google_api_keys() -> tuple[str, str] | None:
 def google_search_provider(
     in_options: dict[str, object] | None = None,
-) -> Callable[[str], Awaitable[str | None]]:
+) -> SearchProvider:
     options = GoogleOptions.model_validate(in_options) if in_options else None
     num_results = (options.num_results if options else None) or 3
     max_provider_calls = (options.max_provider_calls if options else None) or 3
@@ -69,14 +72,13 @@ def google_search_provider(
     # Create the client within the provider
     client = httpx.AsyncClient()
-    async def search(query: str) -> str | None:
+    async def search(query: str) -> list[ContentText] | None:
         # limit number of concurrent searches
-        page_contents: list[str] = []
-        processed_links: list[SearchLink] = []
+        results: list[ContentText] = []
         search_calls = 0
         # Paginate through search results until we have successfully extracted num_results pages or we have reached max_provider_calls
-        while len(page_contents) < num_results and search_calls < max_provider_calls:
+        while len(results) < num_results and search_calls < max_provider_calls:
             async with concurrency("google_web_search", max_connections):
                 links = await _search(query, start_idx=search_calls * 10)
@@ -84,10 +86,10 @@ def google_search_provider(
                 async def process_link(link: SearchLink) -> None:
                     try:
-                        page = await page_if_relevant(link.url, query, model, client)
-                        if page:
-                            page_contents.append(page)
-                            processed_links.append(link)
+                        if page := await page_if_relevant(
+                            link.url, query, model, client
+                        ):
+                            results.append(page)
                     # exceptions fetching pages are very common!
                     except Exception:
                         pass
@@ -97,18 +99,7 @@ def google_search_provider(
             search_calls += 1
-        return (
-            "\n\n".join(
-                "[{title}]({url}):\n{page_content}".format(
-                    title=link.title, url=link.url, page_content=page_content
-                )
-                for link, page_content in zip(
-                    processed_links, page_contents, strict=True
-                )
-            )
-            if processed_links
-            else None
-        )
+        return results or None
     async def _search(query: str, start_idx: int) -> list[SearchLink]:
         # List of allowed parameters can be found https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
@@ -153,7 +144,7 @@ def google_search_provider(
 async def page_if_relevant(
     url: str, query: str, relevance_model: str | None, client: httpx.AsyncClient
-) -> str | None:
+) -> ContentText | None:
     """
     Use parser model to determine if a web page contents is relevant to a query.
@@ -181,13 +172,16 @@ async def page_if_relevant(
     # parse it
     encoding_scheme = response.encoding or "utf-8"
     soup = BeautifulSoup(response.content.decode(encoding_scheme), "html.parser")
+    page_title = soup.title.get_text(strip=True) if soup.title else None
     main_content = soup.find("main") or soup.find("body") or soup
     if not isinstance(main_content, NavigableString):
         paragraphs = main_content.find_all("p")
         full_text = ""
         for p in paragraphs:
-            full_text += p.get_text(strip=True, separator=" ")
+            full_text += ("\n" if full_text else "") + p.get_text(
+                strip=True, separator=" "
+            )
             if len(full_text.split()) > 2000:
                 break
     else:
@@ -202,6 +196,9 @@ async def page_if_relevant(
     ).message.text
     if "yes" in is_relevant.lower():
-        return full_text
+        return ContentText(
+            text=(f"{page_title}\n" if page_title else "") + full_text,
+            citations=[UrlCitation(url=url, title=page_title)],
+        )
     else:
         return None

inspect_ai/tool/_tools/_web_search/_tavily.py CHANGED Viewed

@@ -1,19 +1,12 @@
-import os
-from typing import Awaitable, Callable, Literal
+from typing import Any, Literal
-import httpx
 from pydantic import BaseModel, Field
-from tenacity import (
-    retry,
-    retry_if_exception,
-    stop_after_attempt,
-    stop_after_delay,
-    wait_exponential_jitter,
-)
-from inspect_ai._util.error import PrerequisiteError
-from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
-from inspect_ai.util._concurrency import concurrency
+from inspect_ai._util.citation import UrlCitation
+from inspect_ai._util.content import ContentText
+from ._base_http_provider import BaseHttpProvider
+from ._web_search_provider import SearchProvider
 class TavilyOptions(BaseModel):
@@ -50,61 +43,50 @@ class TavilySearchResponse(BaseModel):
     response_time: float
-def tavily_search_provider(
-    in_options: dict[str, object] | None = None,
-) -> Callable[[str], Awaitable[str | None]]:
-    options = TavilyOptions.model_validate(in_options) if in_options else None
-    # Separate max_connections (which is an inspect thing) from the rest of the
-    # options which will be passed in the request body
-    max_connections = (options.max_connections if options else None) or 10
-    api_options = (
-        options.model_dump(exclude={"max_connections"}, exclude_none=True)
-        if options
-        else {}
-    )
-    if not api_options.get("include_answer", False):
-        api_options["include_answer"] = True
+class TavilySearchProvider(BaseHttpProvider):
+    """Tavily-specific implementation of HttpSearchProvider."""
-    tavily_api_key = os.environ.get("TAVILY_API_KEY", None)
-    if not tavily_api_key:
-        raise PrerequisiteError(
-            "TAVILY_API_KEY not set in the environment. Please ensure ths variable is defined to use Tavily with the web_search tool.\n\nLearn more about the Tavily web search provider at https://inspect.aisi.org.uk/tools.html#tavily-provider"
+    def __init__(self, options: dict[str, Any] | None = None):
+        super().__init__(
+            env_key_name="TAVILY_API_KEY",
+            api_endpoint="https://api.tavily.com/search",
+            provider_name="Tavily",
+            concurrency_key="tavily_web_search",
+            options=options,
         )
-    # Create the client within the provider
-    client = httpx.AsyncClient(timeout=30)
-    async def search(query: str) -> str | None:
-        # See https://docs.tavily.com/documentation/api-reference/endpoint/search
-        search_url = "https://api.tavily.com/search"
-        headers = {
-            "Authorization": f"Bearer {tavily_api_key}",
+    def prepare_headers(self, api_key: str) -> dict[str, str]:
+        return {
+            "Authorization": f"Bearer {api_key}",
         }
-        body = {"query": query, **api_options}
-        # retry up to 5 times over a period of up to 1 minute
-        @retry(
-            wait=wait_exponential_jitter(),
-            stop=stop_after_attempt(5) | stop_after_delay(60),
-            retry=retry_if_exception(httpx_should_retry),
-            before_sleep=log_httpx_retry_attempt(search_url),
+    def set_default_options(self, options: dict[str, Any]) -> dict[str, Any]:
+        # Force inclusion of answer if not specified
+        new_options = options.copy()
+        new_options["include_answer"] = True
+        return new_options
+    def parse_response(self, response_data: dict[str, Any]) -> ContentText | None:
+        tavily_search_response = TavilySearchResponse.model_validate(response_data)
+        if not tavily_search_response.results and not tavily_search_response.answer:
+            return None
+        return ContentText(
+            text=tavily_search_response.answer or "No answer found.",
+            citations=[
+                UrlCitation(
+                    cited_text=result.content, title=result.title, url=result.url
+                )
+                for result in tavily_search_response.results
+            ],
         )
-        async def _search() -> httpx.Response:
-            response = await client.post(search_url, headers=headers, json=body)
-            response.raise_for_status()
-            return response
-        async with concurrency("tavily_web_search", max_connections):
-            tavily_search_response = TavilySearchResponse.model_validate(
-                (await _search()).json()
-            )
-            results_str = "\n\n".join(
-                [
-                    f"[{result.title}]({result.url}):\n{result.content}"
-                    for result in tavily_search_response.results
-                ]
-            )
-            return f"Answer: {tavily_search_response.answer}\n\n{results_str}"
-    return search
+def tavily_search_provider(
+    in_options: dict[str, object] | None = None,
+) -> SearchProvider:
+    options = TavilyOptions.model_validate(in_options) if in_options else None
+    return TavilySearchProvider(
+        options.model_dump(exclude_none=True) if options else None
+    ).search

inspect_ai/tool/_tools/_web_search/_web_search.py CHANGED Viewed

@@ -1,7 +1,5 @@
 from typing import (
     Any,
-    Awaitable,
-    Callable,
     Literal,
     TypeAlias,
     TypedDict,
@@ -14,10 +12,14 @@ from inspect_ai._util.deprecation import deprecation_warning
 from inspect_ai.tool._tool_def import ToolDef
 from ..._tool import Tool, ToolResult, tool
+from ._exa import ExaOptions, exa_search_provider
 from ._google import GoogleOptions, google_search_provider
 from ._tavily import TavilyOptions, tavily_search_provider
+from ._web_search_provider import SearchProvider
-Provider: TypeAlias = Literal["openai", "tavily", "google"]  # , "gemini", "anthropic"
+Provider: TypeAlias = Literal[
+    "gemini", "openai", "anthropic", "tavily", "google", "exa"
+]
 valid_providers = set(get_args(Provider))
@@ -30,9 +32,21 @@ valid_providers = set(get_args(Provider))
 # If the caller uses this dict form and uses a value of `None`, it means that
 # they want to use that provider and to use the default options.
 class Providers(TypedDict, total=False):
-    google: dict[str, Any] | None
-    tavily: dict[str, Any] | None
-    openai: dict[str, Any] | None
+    openai: dict[str, Any] | Literal[True]
+    anthropic: dict[str, Any] | Literal[True]
+    gemini: dict[str, Any] | Literal[True]
+    tavily: dict[str, Any] | Literal[True]
+    google: dict[str, Any] | Literal[True]
+    exa: dict[str, Any] | Literal[True]
+class _NormalizedProviders(TypedDict, total=False):
+    openai: dict[str, Any]
+    anthropic: dict[str, Any]
+    gemini: dict[str, Any]
+    tavily: dict[str, Any]
+    google: dict[str, Any]
+    exa: dict[str, Any]
 class WebSearchDeprecatedArgs(TypedDict, total=False):
@@ -53,13 +67,13 @@ def web_search(
     Web searches are executed using a provider. Providers are split
     into two categories:
-    - Internal providers: "openai" - these use the model's built-in search
-      capability and do not require separate API keys. These work only for
+    - Internal providers: "openai", "anthropic" - these use the model's built-in
+      search capability and do not require separate API keys. These work only for
       their respective model provider (e.g. the "openai" search provider
       works only for `openai/*` models).
-    - External providers: "tavily" and "google". These are external services
-      that work with any m odel and require separate accounts and API keys.
+    - External providers: "tavily", "google", and "exa". These are external services
+      that work with any model and require separate accounts and API keys.
     Internal providers will be prioritized if running on the corresponding model
     (e.g., "openai" provider will be used when running on `openai` models). If an
@@ -70,12 +84,12 @@ def web_search(
     Args:
       providers: Configuration for the search providers to use. Currently supported
-        providers are "openai","tavily", and "google", The `providers` parameter
-        supports several formats based on either a `str` specifying a provider or
-        a `dict` whose keys are the provider names and whose values are the
-        provider-specific options. A single value or a list of these can be passed.
-        This arg is optional just for backwards compatibility. New code should
-        always provide this argument.
+        providers are "openai", "anthropic", "tavily", "google", and "exa". The
+        `providers` parameter supports several formats based on either a `str`
+        specifying a provider or a `dict` whose keys are the provider names and
+        whose values are the provider-specific options. A single value or a list
+        of these can be passed. This arg is optional just for backwards compatibility.
+        New code should always provide this argument.
         Single provider:
         ```
@@ -88,8 +102,8 @@ def web_search(
         # "openai" used for OpenAI models, "tavily" as fallback
         web_search(["openai", "tavily"])
-        # The None value means to use the provider with default options
-        web_search({"openai": None, "tavily": {"max_results": 5}}
+        # The True value means to use the provider with default options
+        web_search({"openai": True, "tavily": {"max_results": 5}}
         ```
         Mixed format:
@@ -104,9 +118,15 @@ def web_search(
         - openai: Supports OpenAI's web search parameters.
           See https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses
+        - anthropic: Supports Anthropic's web search parameters.
+          See https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/web-search-tool#tool-definition
         - tavily: Supports options like `max_results`, `search_depth`, etc.
           See https://docs.tavily.com/documentation/api-reference/endpoint/search
+        - exa: Supports options like `text`, `model`, etc.
+          See https://docs.exa.ai/reference/answer
         - google: Supports options like `num_results`, `max_provider_calls`,
           `max_connections`, and `model`
@@ -117,7 +137,7 @@ def web_search(
     """
     normalized_providers = _normalize_config(providers, **deprecated)
-    search_provider: Callable[[str], Awaitable[str | None]] | None = None
+    search_provider: SearchProvider | None = None
     async def execute(query: str) -> ToolResult:
         """
@@ -131,13 +151,17 @@ def web_search(
             search_provider = _create_external_provider(normalized_providers)
         search_result = await search_provider(query)
+        # This is gunky here because ToolResult is typed with a List rather than
+        # a Sequence, and Lists are variant (rather than covariant). This means
+        # it's illegal to assign a List of a narrower type to a List of a broader
+        # type. By making a copy of the list and not capturing an alias to it,
+        # mypy knows it's safe.
         return (
-            (
-                "Here are your web search results. Please read them carefully as they may be useful later!\n"
-                + search_result
-            )
-            if search_result
-            else ("I'm sorry, I couldn't find any relevant information on the web.")
+            list(search_result)
+            if isinstance(search_result, list)
+            else search_result
+            if search_result is not None
+            else "I couldn't find any relevant information on the web."
         )
     return ToolDef(
@@ -148,7 +172,7 @@ def web_search(
 def _normalize_config(
     providers: Provider | Providers | list[Provider | Providers] | None,
     **deprecated: Unpack[WebSearchDeprecatedArgs],
-) -> Providers:
+) -> _NormalizedProviders:
     """
     Deal with breaking changes in the web_search parameter list.
@@ -191,34 +215,48 @@ def _normalize_config(
         )
     assert providers, "providers should not be None here"
-    normalized: Providers = {}
+    normalized: _NormalizedProviders = {}
     for entry in providers if isinstance(providers, list) else [providers]:
         if isinstance(entry, str):
             if entry not in valid_providers:
                 raise ValueError(f"Invalid provider: '{entry}'")
-            normalized[entry] = None  # type: ignore
+            normalized[entry] = {}  # type: ignore
         else:
             for key, value in entry.items():
                 if key not in valid_providers:
                     raise ValueError(f"Invalid provider: '{key}'")
-                normalized[key] = value  # type: ignore
+                if (
+                    not isinstance(value, dict)
+                    and value is not True
+                    and value is not None
+                ):
+                    raise ValueError(
+                        f"Invalid value for provider '{key}': {value}. Expected a dict, None, or True."
+                    )
+                normalized[key] = value if isinstance(value, dict) else {}  # type: ignore
     return normalized
 def _get_config_via_back_compat(
-    provider: Literal["tavily", "google"],
+    provider: Literal["tavily", "google", "exa"],
     num_results: int | None,
     max_provider_calls: int | None,
     max_connections: int | None,
     model: str | None,
-) -> Providers:
+) -> _NormalizedProviders:
     if (
         num_results is None
         and max_provider_calls is None
         and max_connections is None
         and model is None
     ):
-        return {"google": None} if provider == "google" else {"tavily": None}
+        if provider == "google":
+            return {"google": {}}
+        elif provider == "exa":
+            return {"exa": {}}
+        else:
+            return {"tavily": {}}
     # If we get here, we have at least one old school parameter
     deprecation_warning(
@@ -234,6 +272,12 @@ def _get_config_via_back_compat(
                 model=model,
             ).model_dump(exclude_none=True)
         }
+    elif provider == "exa":
+        return {
+            "exa": ExaOptions(max_connections=max_connections).model_dump(
+                exclude_none=True
+            )
+        }
     else:
         return {
             "tavily": TavilyOptions(
@@ -243,12 +287,15 @@ def _get_config_via_back_compat(
 def _create_external_provider(
-    providers: Providers,
-) -> Callable[[str], Awaitable[str | None]]:
+    providers: _NormalizedProviders,
+) -> SearchProvider:
     if "tavily" in providers:
-        return tavily_search_provider(providers.get("tavily", None))
+        return tavily_search_provider(providers.get("tavily"))
+    if "exa" in providers:
+        return exa_search_provider(providers.get("exa"))
     if "google" in providers:
-        return google_search_provider(providers.get("google", None))
+        return google_search_provider(providers.get("google"))
     raise ValueError("No valid provider found.")

inspect_ai/tool/_tools/_web_search/_web_search_provider.py ADDED Viewed

@@ -0,0 +1,7 @@
+from typing import Awaitable, Callable, TypeAlias
+from inspect_ai._util.content import ContentText
+SearchProvider: TypeAlias = Callable[
+    [str], Awaitable[str | ContentText | list[ContentText] | None]
+]

inspect_ai/util/_display.py CHANGED Viewed

@@ -8,7 +8,7 @@ from inspect_ai._util.thread import is_main_thread
 logger = getLogger(__name__)
-DisplayType = Literal["full", "conversation", "rich", "plain", "none"]
+DisplayType = Literal["full", "conversation", "rich", "plain", "log", "none"]
 """Console display type."""
@@ -34,7 +34,7 @@ def init_display_type(display: str | None = None) -> DisplayType:
         display = "plain"
     match display:
-        case "full" | "conversation" | "rich" | "plain" | "none":
+        case "full" | "conversation" | "rich" | "plain" | "log" | "none":
             _display_type = display
         case _:
             logger.warning(
@@ -57,6 +57,15 @@ def display_type() -> DisplayType:
         return init_display_type()
+def display_type_plain() -> bool:
+    """Does the current display type prefer plain text?
+    Returns:
+       bool: True if the display type is "plain" or "log".
+    """
+    return display_type() in ["plain", "log"]
 def display_type_initialized() -> bool:
     global _display_type
     return _display_type is not None

inspect_ai/util/_sandbox/docker/compose.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pydantic import BaseModel
 from inspect_ai._util.error import PrerequisiteError
 from inspect_ai._util.trace import trace_message
 from inspect_ai.util._concurrency import concurrency
-from inspect_ai.util._display import display_type
+from inspect_ai.util._display import display_type, display_type_plain
 from inspect_ai.util._subprocess import ExecResult, subprocess
 from .prereqs import (
@@ -285,7 +285,7 @@ async def compose_command(
     env = project.env if (project.env and forward_env) else {}
     # ansi (apply global override)
-    if display_type() == "plain":
+    if display_type_plain():
         ansi = "never"
     if ansi:
         compose_command = compose_command + ["--ansi", ansi]

inspect_ai/util/_span.py CHANGED Viewed

@@ -1,8 +1,12 @@
 import contextlib
+import inspect
 from contextvars import ContextVar
+from logging import getLogger
 from typing import AsyncIterator
 from uuid import uuid4
+logger = getLogger(__name__)
 @contextlib.asynccontextmanager
 async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
@@ -22,6 +26,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
     # span id
     id = uuid4().hex
+    # span caller context
+    frame = inspect.stack()[1]
+    caller = f"{frame.function}() [{frame.filename}:{frame.lineno}]"
     # capture parent id
     parent_id = _current_span_id.get()
@@ -48,7 +56,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
         # send end event
         transcript()._event(SpanEndEvent(id=id))
-        _current_span_id.reset(token)
+        try:
+            _current_span_id.reset(token)
+        except ValueError:
+            logger.warning(f"Exiting span created in another context: {caller}")
 def current_span_id() -> str | None:

{inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspect_ai
-Version: 0.3.103
+Version: 0.3.104
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License
@@ -63,7 +63,7 @@ Requires-Dist: groq; extra == "dev"
 Requires-Dist: ipython; extra == "dev"
 Requires-Dist: jsonpath-ng; extra == "dev"
 Requires-Dist: markdown; extra == "dev"
-Requires-Dist: mcp; extra == "dev"
+Requires-Dist: mcp>=1.9.4; extra == "dev"
 Requires-Dist: mistralai; extra == "dev"
 Requires-Dist: moto[server]; extra == "dev"
 Requires-Dist: mypy>=1.16.0; extra == "dev"

inspect-ai 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl

inspect-ai 0.3.103py3-none-any.whl → 0.3.104py3-none-any.whl