PyPI - inspect-ai - Versions diffs - 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl - Mend

inspect-ai 0.3.98py3-none-any.whl → 0.3.100py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

inspect_ai/__init__.py +2 -0
inspect_ai/_cli/log.py +1 -1
inspect_ai/_display/core/config.py +11 -5
inspect_ai/_display/core/panel.py +66 -2
inspect_ai/_display/core/textual.py +5 -2
inspect_ai/_display/plain/display.py +1 -0
inspect_ai/_display/rich/display.py +2 -2
inspect_ai/_display/textual/widgets/transcript.py +41 -1
inspect_ai/_eval/run.py +12 -4
inspect_ai/_eval/score.py +2 -4
inspect_ai/_eval/task/log.py +1 -1
inspect_ai/_eval/task/run.py +59 -81
inspect_ai/_eval/task/task.py +1 -1
inspect_ai/_util/_async.py +1 -1
inspect_ai/_util/content.py +11 -6
inspect_ai/_util/interrupt.py +2 -2
inspect_ai/_util/text.py +7 -0
inspect_ai/_util/working.py +8 -37
inspect_ai/_view/__init__.py +0 -0
inspect_ai/_view/schema.py +3 -1
inspect_ai/_view/view.py +14 -0
inspect_ai/_view/www/CLAUDE.md +15 -0
inspect_ai/_view/www/dist/assets/index.css +273 -169
inspect_ai/_view/www/dist/assets/index.js +20079 -17019
inspect_ai/_view/www/log-schema.json +122 -8
inspect_ai/_view/www/package.json +5 -1
inspect_ai/_view/www/src/@types/log.d.ts +20 -2
inspect_ai/_view/www/src/app/App.tsx +1 -15
inspect_ai/_view/www/src/app/appearance/icons.ts +4 -1
inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +24 -6
inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +0 -5
inspect_ai/_view/www/src/app/content/RenderedContent.tsx +221 -205
inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +2 -1
inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +5 -0
inspect_ai/_view/www/src/app/routing/url.ts +84 -4
inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +0 -5
inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +1 -1
inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +7 -0
inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +26 -19
inspect_ai/_view/www/src/app/samples/SampleSummaryView.module.css +1 -2
inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +8 -6
inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +0 -4
inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +3 -2
inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +2 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +2 -0
inspect_ai/_view/www/src/app/samples/chat/messages.ts +1 -0
inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -0
inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +1 -1
inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.module.css +2 -2
inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +2 -3
inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.module.css +1 -1
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +3 -2
inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +4 -5
inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +1 -3
inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +3 -4
inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.module.css +42 -0
inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +77 -0
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +27 -71
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +13 -3
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +27 -2
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +1 -0
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +21 -22
inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.module.css +45 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +223 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.module.css +10 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +258 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +187 -0
inspect_ai/_view/www/src/app/samples/transcript/state/StateEventRenderers.tsx +8 -1
inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +3 -4
inspect_ai/_view/www/src/app/samples/transcript/transform/hooks.ts +78 -0
inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +340 -135
inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +3 -0
inspect_ai/_view/www/src/app/samples/transcript/types.ts +2 -0
inspect_ai/_view/www/src/app/types.ts +5 -1
inspect_ai/_view/www/src/client/api/api-browser.ts +2 -2
inspect_ai/_view/www/src/components/LiveVirtualList.tsx +6 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +1 -1
inspect_ai/_view/www/src/components/PopOver.tsx +422 -0
inspect_ai/_view/www/src/components/PulsingDots.module.css +9 -9
inspect_ai/_view/www/src/components/PulsingDots.tsx +4 -1
inspect_ai/_view/www/src/components/StickyScroll.tsx +183 -0
inspect_ai/_view/www/src/components/TabSet.tsx +4 -0
inspect_ai/_view/www/src/state/hooks.ts +52 -2
inspect_ai/_view/www/src/state/logSlice.ts +4 -3
inspect_ai/_view/www/src/state/samplePolling.ts +8 -0
inspect_ai/_view/www/src/state/sampleSlice.ts +53 -9
inspect_ai/_view/www/src/state/scrolling.ts +152 -0
inspect_ai/_view/www/src/utils/attachments.ts +7 -0
inspect_ai/_view/www/src/utils/python.ts +18 -0
inspect_ai/_view/www/yarn.lock +269 -6
inspect_ai/agent/_react.py +12 -7
inspect_ai/agent/_run.py +46 -11
inspect_ai/analysis/beta/_dataframe/samples/table.py +19 -18
inspect_ai/log/_bundle.py +5 -3
inspect_ai/log/_log.py +3 -3
inspect_ai/log/_recorders/file.py +2 -9
inspect_ai/log/_transcript.py +1 -1
inspect_ai/model/_call_tools.py +6 -2
inspect_ai/model/_openai.py +1 -1
inspect_ai/model/_openai_responses.py +78 -39
inspect_ai/model/_openai_web_search.py +31 -0
inspect_ai/model/_providers/anthropic.py +3 -6
inspect_ai/model/_providers/azureai.py +72 -3
inspect_ai/model/_providers/openai.py +2 -1
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/scorer/_metric.py +1 -2
inspect_ai/solver/_task_state.py +2 -2
inspect_ai/tool/_tool.py +6 -2
inspect_ai/tool/_tool_def.py +27 -4
inspect_ai/tool/_tool_info.py +2 -0
inspect_ai/tool/_tools/_web_search/_google.py +15 -4
inspect_ai/tool/_tools/_web_search/_tavily.py +35 -12
inspect_ai/tool/_tools/_web_search/_web_search.py +214 -45
inspect_ai/util/__init__.py +6 -0
inspect_ai/util/_json.py +3 -0
inspect_ai/util/_limit.py +374 -141
inspect_ai/util/_sandbox/docker/compose.py +20 -11
inspect_ai/util/_span.py +1 -1
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/METADATA +3 -3
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/RECORD +131 -117
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tools/_web_search/_tavily.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import Awaitable, Callable
+from typing import Awaitable, Callable, Literal
 import httpx
 from pydantic import BaseModel, Field
@@ -16,6 +16,25 @@ from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
 from inspect_ai.util._concurrency import concurrency
+class TavilyOptions(BaseModel):
+    topic: Literal["general", "news"] | None = None
+    search_depth: Literal["basic", "advanced"] | None = None
+    chunks_per_source: Literal[1, 2, 3] | None = None
+    max_results: int | None = None
+    time_range: Literal["day", "week", "month", "year", "d", "w", "m", "y"] | None = (
+        None
+    )
+    days: int | None = None
+    include_answer: bool | Literal["basic", "advanced"] | None = None
+    include_raw_content: bool | None = None
+    include_images: bool | None = None
+    include_image_descriptions: bool | None = None
+    include_domains: list[str] | None = None
+    exclude_domains: list[str] | None = None
+    # max_connections is not a Tavily API option, but an inspect option
+    max_connections: int | None = None
 class TavilySearchResult(BaseModel):
     title: str
     url: str
@@ -32,17 +51,25 @@ class TavilySearchResponse(BaseModel):
 def tavily_search_provider(
-    num_results: int, max_connections: int
+    in_options: dict[str, object] | None = None,
 ) -> Callable[[str], Awaitable[str | None]]:
+    options = TavilyOptions.model_validate(in_options) if in_options else None
+    # Separate max_connections (which is an inspect thing) from the rest of the
+    # options which will be passed in the request body
+    max_connections = (options.max_connections if options else None) or 10
+    api_options = (
+        options.model_dump(exclude={"max_connections"}, exclude_none=True)
+        if options
+        else {}
+    )
+    if not api_options.get("include_answer", False):
+        api_options["include_answer"] = True
     tavily_api_key = os.environ.get("TAVILY_API_KEY", None)
     if not tavily_api_key:
         raise PrerequisiteError(
             "TAVILY_API_KEY not set in the environment. Please ensure ths variable is defined to use Tavily with the web_search tool.\n\nLearn more about the Tavily web search provider at https://inspect.aisi.org.uk/tools.html#tavily-provider"
         )
-    if num_results > 20:
-        raise PrerequisiteError(
-            "The Tavily search provider is limited to 20 results per query."
-        )
     # Create the client within the provider
     client = httpx.AsyncClient(timeout=30)
@@ -52,12 +79,8 @@ def tavily_search_provider(
         headers = {
             "Authorization": f"Bearer {tavily_api_key}",
         }
-        body = {
-            "query": query,
-            "max_results": 10,  # num_results,
-            # "search_depth": "advanced",
-            "include_answer": "advanced",
-        }
+        body = {"query": query, **api_options}
         # retry up to 5 times over a period of up to 1 minute
         @retry(

inspect_ai/tool/_tools/_web_search/_web_search.py CHANGED Viewed

@@ -1,68 +1,123 @@
-from typing import Literal
+from typing import (
+    Any,
+    Awaitable,
+    Callable,
+    Literal,
+    TypeAlias,
+    TypedDict,
+    get_args,
+)
+from typing_extensions import Unpack
 from inspect_ai._util.deprecation import deprecation_warning
+from inspect_ai.tool._tool_def import ToolDef
 from ..._tool import Tool, ToolResult, tool
-from ._google import google_search_provider, maybe_get_google_api_keys
-from ._tavily import tavily_search_provider
+from ._google import GoogleOptions, google_search_provider
+from ._tavily import TavilyOptions, tavily_search_provider
+Provider: TypeAlias = Literal["openai", "tavily", "google"]  # , "gemini", "anthropic"
+valid_providers = set(get_args(Provider))
+# It would have been nice if the values below were TypedDicts. The problem is
+# that if the caller creates a literal dict variable (rather than passing the
+# dict inline), the type checker will erase the type of the literal to something
+# that doesn't conform the the required TypedDict when passed. This is lame, but
+# we'll do runtime validation instead.
+#
+# If the caller uses this dict form and uses a value of `None`, it means that
+# they want to use that provider and to use the default options.
+class Providers(TypedDict, total=False):
+    google: dict[str, Any] | None
+    tavily: dict[str, Any] | None
+    openai: dict[str, Any] | None
+class WebSearchDeprecatedArgs(TypedDict, total=False):
+    provider: Literal["tavily", "google"] | None
+    num_results: int | None
+    max_provider_calls: int | None
+    max_connections: int | None
+    model: str | None
 @tool
 def web_search(
-    provider: Literal["tavily", "google"] | None = None,
-    num_results: int = 3,
-    max_provider_calls: int = 3,
-    max_connections: int = 10,
-    model: str | None = None,
+    providers: Provider | Providers | list[Provider | Providers] | None = None,
+    **deprecated: Unpack[WebSearchDeprecatedArgs],
 ) -> Tool:
     """Web search tool.
-    A tool that can be registered for use by models to search the web. Use
-    the `use_tools()` solver to make the tool available (e.g.
-    `use_tools(web_search(provider="tavily"))`))
+    Web searches are executed using a provider. Providers are split
+    into two categories:
-    A web search is conducted using the specified provider.
-    - When using Tavily, all logic for relevance and summarization is handled by
-    the Tavily API.
-    - When using Google, the results are parsed for relevance using the specified
-    model, and the top 'num_results' relevant pages are returned.
+    - Internal providers: "openai" - these use the model's built-in search
+      capability and do not require separate API keys. These work only for
+      their respective model provider (e.g. the "openai" search provider
+      works only for `openai/*` models).
+    - External providers: "tavily" and "google". These are external services
+      that work with any m odel and require separate accounts and API keys.
+    Internal providers will be prioritized if running on the corresponding model
+    (e.g., "openai" provider will be used when running on `openai` models). If an
+    internal provider is specified but the evaluation is run with a different
+    model, a fallback external provider must also be specified.
     See further documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-search>.
     Args:
-      provider: Search provider to use:
-        - "tavily": Uses Tavily's Research API.
-        - "google": Uses Google Custom Search.
-        Note: The `| None` type is only for backwards compatibility. Passing
-        `None` is deprecated.
-      num_results: The number of search result pages used to provide information
-        back to the model.
-      max_provider_calls: Maximum number of search calls to make to the search
-        provider.
-      max_connections: Maximum number of concurrent connections to API endpoint
-        of search provider.
-      model: Model used to parse web pages for relevance - used only by the
-        `google` provider.
+      providers: Configuration for the search providers to use. Currently supported
+        providers are "openai","tavily", and "google", The `providers` parameter
+        supports several formats based on either a `str` specifying a provider or
+        a `dict` whose keys are the provider names and whose values are the
+        provider-specific options. A single value or a list of these can be passed.
+        This arg is optional just for backwards compatibility. New code should
+        always provide this argument.
+        Single provider:
+        ```
+        web_search("tavily")
+        web_search({"tavily": {"max_results": 5}})  # Tavily-specific options
+        ```
+        Multiple providers:
+        ```
+        # "openai" used for OpenAI models, "tavily" as fallback
+        web_search(["openai", "tavily"])
+        # The None value means to use the provider with default options
+        web_search({"openai": None, "tavily": {"max_results": 5}}
+        ```
+        Mixed format:
+        ```
+        web_search(["openai", {"tavily": {"max_results": 5}}])
+        ```
+        When specified in the `dict` format, the `None` value for a provider means
+        to use the provider with default options.
+        Provider-specific options:
+        - openai: Supports OpenAI's web search parameters.
+          See https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses
+        - tavily: Supports options like `max_results`, `search_depth`, etc.
+          See https://docs.tavily.com/documentation/api-reference/endpoint/search
+        - google: Supports options like `num_results`, `max_provider_calls`,
+          `max_connections`, and `model`
+      **deprecated: Deprecated arguments.
     Returns:
        A tool that can be registered for use by models to search the web.
     """
-    if provider is None:
-        if maybe_get_google_api_keys():
-            deprecation_warning(
-                "The `google` `web_search` provider was inferred based on the presence of environment variables. Please specify the provider explicitly to avoid this warning."
-            )
-            provider = "google"
-        else:
-            raise ValueError(
-                "Omitting `provider` is no longer supported. Please specify the `web_search` provider explicitly to avoid this error."
-            )
+    normalized_providers = _normalize_config(providers, **deprecated)
-    search_provider = (
-        google_search_provider(num_results, max_provider_calls, max_connections, model)
-        if provider == "google"
-        else tavily_search_provider(num_results, max_connections)
-    )
+    search_provider: Callable[[str], Awaitable[str | None]] | None = None
     async def execute(query: str) -> ToolResult:
         """
@@ -71,6 +126,9 @@ def web_search(
         Args:
             query (str): Search query.
         """
+        nonlocal search_provider
+        if not search_provider:
+            search_provider = _create_external_provider(normalized_providers)
         search_result = await search_provider(query)
         return (
@@ -82,4 +140,115 @@ def web_search(
             else ("I'm sorry, I couldn't find any relevant information on the web.")
         )
-    return execute
+    return ToolDef(
+        execute, name="web_search", options=dict(normalized_providers)
+    ).as_tool()
+def _normalize_config(
+    providers: Provider | Providers | list[Provider | Providers] | None,
+    **deprecated: Unpack[WebSearchDeprecatedArgs],
+) -> Providers:
+    """
+    Deal with breaking changes in the web_search parameter list.
+    This function adapts (hopefully) all of the old variants of how the tool
+    factory may have been called converts to the new config format.
+    """
+    # Cases to handle:
+    # 1. Both deprecated_provider and providers are set
+    #     ValueError
+    # 2. Neither deprecated_provider nor providers is set
+    #     act as if they passed provider="google"
+    # 3. Only providers is set
+    #     if any of the other deprecated parameters is set, then ValueError
+    #     else Happy path
+    # 4. Only deprecated_provider is set
+    #     convert to new config format - including processing old other params
+    deprecated_provider = deprecated.get("provider", None)
+    # Case 1.
+    if deprecated_provider and providers:
+        raise ValueError("`provider` is deprecated. Please only specify `providers`.")
+    # Case 2.
+    if providers is None and deprecated_provider is None:
+        deprecated_provider = "google"
+    num_results = deprecated.get("num_results", None)
+    max_provider_calls = deprecated.get("max_provider_calls", None)
+    max_connections = deprecated.get("max_connections", None)
+    model = deprecated.get("model", None)
+    # Getting here means that we have either a providers or a deprecated_provider
+    if deprecated_provider:
+        return _get_config_via_back_compat(
+            deprecated_provider,
+            num_results=num_results,
+            max_provider_calls=max_provider_calls,
+            max_connections=max_connections,
+            model=model,
+        )
+    assert providers, "providers should not be None here"
+    normalized: Providers = {}
+    for entry in providers if isinstance(providers, list) else [providers]:
+        if isinstance(entry, str):
+            if entry not in valid_providers:
+                raise ValueError(f"Invalid provider: '{entry}'")
+            normalized[entry] = None  # type: ignore
+        else:
+            for key, value in entry.items():
+                if key not in valid_providers:
+                    raise ValueError(f"Invalid provider: '{key}'")
+                normalized[key] = value  # type: ignore
+    return normalized
+def _get_config_via_back_compat(
+    provider: Literal["tavily", "google"],
+    num_results: int | None,
+    max_provider_calls: int | None,
+    max_connections: int | None,
+    model: str | None,
+) -> Providers:
+    if (
+        num_results is None
+        and max_provider_calls is None
+        and max_connections is None
+        and model is None
+    ):
+        return {"google": None} if provider == "google" else {"tavily": None}
+    # If we get here, we have at least one old school parameter
+    deprecation_warning(
+        "The `num_results`, `max_provider_calls`, `max_connections`, and `model` parameters are deprecated. Please use the `config` parameter instead."
+    )
+    if provider == "google":
+        return {
+            "google": GoogleOptions(
+                num_results=num_results,
+                max_provider_calls=max_provider_calls,
+                max_connections=max_connections,
+                model=model,
+            ).model_dump(exclude_none=True)
+        }
+    else:
+        return {
+            "tavily": TavilyOptions(
+                max_results=num_results, max_connections=max_connections
+            ).model_dump(exclude_none=True)
+        }
+def _create_external_provider(
+    providers: Providers,
+) -> Callable[[str], Awaitable[str | None]]:
+    if "tavily" in providers:
+        return tavily_search_provider(providers.get("tavily", None))
+    if "google" in providers:
+        return google_search_provider(providers.get("google", None))
+    raise ValueError("No valid provider found.")

inspect_ai/util/__init__.py CHANGED Viewed

@@ -3,9 +3,12 @@ from inspect_ai._util.trace import trace_action, trace_message
 from inspect_ai.util._limit import (
     Limit,
     LimitExceededError,
+    LimitScope,
     apply_limits,
     message_limit,
+    time_limit,
     token_limit,
+    working_limit,
 )
 from ._collect import collect
@@ -58,6 +61,7 @@ __all__ = [
     "resource",
     "subprocess",
     "LimitExceededError",
+    "LimitScope",
     "SandboxEnvironment",
     "SandboxEnvironmentConfigType",
     "SandboxEnvironmentLimits",
@@ -79,6 +83,8 @@ __all__ = [
     "subtask",
     "throttle",
     "token_limit",
+    "time_limit",
+    "working_limit",
     "trace_action",
     "trace_message",
     "RegistryType",

inspect_ai/util/_json.py CHANGED Viewed

@@ -3,6 +3,7 @@ import typing
 from copy import deepcopy
 from dataclasses import is_dataclass
 from datetime import date, datetime, time
+from enum import EnumMeta
 from typing import (
     Any,
     Dict,
@@ -101,6 +102,8 @@ def json_schema(t: Type[Any]) -> JSONSchema:
             or (isinstance(t, type) and issubclass(t, BaseModel))
         ):
             return cls_json_schema(t)
+        elif isinstance(t, EnumMeta):
+            return JSONSchema(enum=[item.value for item in t])
         elif t is type(None):
             return JSONSchema(type="null")
         else:

inspect-ai 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl

inspect-ai 0.3.98py3-none-any.whl → 0.3.100py3-none-any.whl