inspect-ai 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -2
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +1 -1
  16. inspect_ai/_eval/evalset.py +2 -2
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +5 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +2 -0
  21. inspect_ai/_eval/task/run.py +1 -1
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/www/dist/assets/index.css +82 -24
  28. inspect_ai/_view/www/dist/assets/index.js +10124 -9808
  29. inspect_ai/_view/www/log-schema.json +418 -1
  30. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  31. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  32. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  36. inspect_ai/_view/www/package.json +2 -2
  37. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  38. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  39. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  40. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  41. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  42. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  43. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  45. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  46. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  48. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  49. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  50. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  51. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  55. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  56. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  57. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  58. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  59. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  60. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  61. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  62. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  63. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  64. inspect_ai/_view/www/src/tests/README.md +2 -2
  65. inspect_ai/_view/www/src/utils/git.ts +3 -1
  66. inspect_ai/_view/www/src/utils/html.ts +6 -0
  67. inspect_ai/agent/_handoff.py +3 -3
  68. inspect_ai/log/_condense.py +5 -0
  69. inspect_ai/log/_file.py +4 -1
  70. inspect_ai/log/_log.py +9 -4
  71. inspect_ai/log/_recorders/json.py +4 -2
  72. inspect_ai/log/_util.py +2 -0
  73. inspect_ai/model/__init__.py +14 -0
  74. inspect_ai/model/_call_tools.py +13 -4
  75. inspect_ai/model/_chat_message.py +3 -0
  76. inspect_ai/model/_openai_responses.py +80 -34
  77. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  78. inspect_ai/model/_providers/_google_citations.py +100 -0
  79. inspect_ai/model/_providers/anthropic.py +196 -34
  80. inspect_ai/model/_providers/google.py +94 -22
  81. inspect_ai/model/_providers/mistral.py +20 -7
  82. inspect_ai/model/_providers/openai.py +11 -10
  83. inspect_ai/model/_providers/openai_compatible.py +3 -2
  84. inspect_ai/model/_providers/openai_responses.py +2 -5
  85. inspect_ai/model/_providers/perplexity.py +123 -0
  86. inspect_ai/model/_providers/providers.py +13 -2
  87. inspect_ai/model/_providers/vertex.py +3 -0
  88. inspect_ai/model/_trim.py +5 -0
  89. inspect_ai/tool/__init__.py +14 -0
  90. inspect_ai/tool/_mcp/_mcp.py +5 -2
  91. inspect_ai/tool/_mcp/sampling.py +19 -3
  92. inspect_ai/tool/_mcp/server.py +1 -1
  93. inspect_ai/tool/_tool.py +10 -1
  94. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  95. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  96. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  97. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  98. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  99. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  100. inspect_ai/util/_display.py +11 -2
  101. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  102. inspect_ai/util/_span.py +12 -1
  103. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
  104. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +110 -86
  105. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  106. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  107. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
  108. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
  109. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
  110. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,4 @@
1
1
  import os
2
- from typing import Awaitable, Callable
3
2
 
4
3
  import anyio
5
4
  import httpx
@@ -13,10 +12,14 @@ from tenacity import (
13
12
  wait_exponential_jitter,
14
13
  )
15
14
 
15
+ from inspect_ai._util.citation import UrlCitation
16
+ from inspect_ai._util.content import ContentText
16
17
  from inspect_ai._util.error import PrerequisiteError
17
18
  from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
18
19
  from inspect_ai.util._concurrency import concurrency
19
20
 
21
+ from ._web_search_provider import SearchProvider
22
+
20
23
  DEFAULT_RELEVANCE_PROMPT = """I am trying to answer the following question and need to find the most relevant information on the web. Please let me know if the following content is relevant to the question or not. You should just respond with "yes" or "no".
21
24
 
22
25
  Question: {question}
@@ -52,7 +55,7 @@ def maybe_get_google_api_keys() -> tuple[str, str] | None:
52
55
 
53
56
  def google_search_provider(
54
57
  in_options: dict[str, object] | None = None,
55
- ) -> Callable[[str], Awaitable[str | None]]:
58
+ ) -> SearchProvider:
56
59
  options = GoogleOptions.model_validate(in_options) if in_options else None
57
60
  num_results = (options.num_results if options else None) or 3
58
61
  max_provider_calls = (options.max_provider_calls if options else None) or 3
@@ -69,14 +72,13 @@ def google_search_provider(
69
72
  # Create the client within the provider
70
73
  client = httpx.AsyncClient()
71
74
 
72
- async def search(query: str) -> str | None:
75
+ async def search(query: str) -> list[ContentText] | None:
73
76
  # limit number of concurrent searches
74
- page_contents: list[str] = []
75
- processed_links: list[SearchLink] = []
77
+ results: list[ContentText] = []
76
78
  search_calls = 0
77
79
 
78
80
  # Paginate through search results until we have successfully extracted num_results pages or we have reached max_provider_calls
79
- while len(page_contents) < num_results and search_calls < max_provider_calls:
81
+ while len(results) < num_results and search_calls < max_provider_calls:
80
82
  async with concurrency("google_web_search", max_connections):
81
83
  links = await _search(query, start_idx=search_calls * 10)
82
84
 
@@ -84,10 +86,10 @@ def google_search_provider(
84
86
 
85
87
  async def process_link(link: SearchLink) -> None:
86
88
  try:
87
- page = await page_if_relevant(link.url, query, model, client)
88
- if page:
89
- page_contents.append(page)
90
- processed_links.append(link)
89
+ if page := await page_if_relevant(
90
+ link.url, query, model, client
91
+ ):
92
+ results.append(page)
91
93
  # exceptions fetching pages are very common!
92
94
  except Exception:
93
95
  pass
@@ -97,18 +99,7 @@ def google_search_provider(
97
99
 
98
100
  search_calls += 1
99
101
 
100
- return (
101
- "\n\n".join(
102
- "[{title}]({url}):\n{page_content}".format(
103
- title=link.title, url=link.url, page_content=page_content
104
- )
105
- for link, page_content in zip(
106
- processed_links, page_contents, strict=True
107
- )
108
- )
109
- if processed_links
110
- else None
111
- )
102
+ return results or None
112
103
 
113
104
  async def _search(query: str, start_idx: int) -> list[SearchLink]:
114
105
  # List of allowed parameters can be found https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
@@ -153,7 +144,7 @@ def google_search_provider(
153
144
 
154
145
  async def page_if_relevant(
155
146
  url: str, query: str, relevance_model: str | None, client: httpx.AsyncClient
156
- ) -> str | None:
147
+ ) -> ContentText | None:
157
148
  """
158
149
  Use parser model to determine if a web page contents is relevant to a query.
159
150
 
@@ -181,13 +172,16 @@ async def page_if_relevant(
181
172
  # parse it
182
173
  encoding_scheme = response.encoding or "utf-8"
183
174
  soup = BeautifulSoup(response.content.decode(encoding_scheme), "html.parser")
175
+ page_title = soup.title.get_text(strip=True) if soup.title else None
184
176
 
185
177
  main_content = soup.find("main") or soup.find("body") or soup
186
178
  if not isinstance(main_content, NavigableString):
187
179
  paragraphs = main_content.find_all("p")
188
180
  full_text = ""
189
181
  for p in paragraphs:
190
- full_text += p.get_text(strip=True, separator=" ")
182
+ full_text += ("\n" if full_text else "") + p.get_text(
183
+ strip=True, separator=" "
184
+ )
191
185
  if len(full_text.split()) > 2000:
192
186
  break
193
187
  else:
@@ -202,6 +196,9 @@ async def page_if_relevant(
202
196
  ).message.text
203
197
 
204
198
  if "yes" in is_relevant.lower():
205
- return full_text
199
+ return ContentText(
200
+ text=(f"{page_title}\n" if page_title else "") + full_text,
201
+ citations=[UrlCitation(url=url, title=page_title)],
202
+ )
206
203
  else:
207
204
  return None
@@ -1,19 +1,12 @@
1
- import os
2
- from typing import Awaitable, Callable, Literal
1
+ from typing import Any, Literal
3
2
 
4
- import httpx
5
3
  from pydantic import BaseModel, Field
6
- from tenacity import (
7
- retry,
8
- retry_if_exception,
9
- stop_after_attempt,
10
- stop_after_delay,
11
- wait_exponential_jitter,
12
- )
13
4
 
14
- from inspect_ai._util.error import PrerequisiteError
15
- from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
16
- from inspect_ai.util._concurrency import concurrency
5
+ from inspect_ai._util.citation import UrlCitation
6
+ from inspect_ai._util.content import ContentText
7
+
8
+ from ._base_http_provider import BaseHttpProvider
9
+ from ._web_search_provider import SearchProvider
17
10
 
18
11
 
19
12
  class TavilyOptions(BaseModel):
@@ -50,61 +43,50 @@ class TavilySearchResponse(BaseModel):
50
43
  response_time: float
51
44
 
52
45
 
53
- def tavily_search_provider(
54
- in_options: dict[str, object] | None = None,
55
- ) -> Callable[[str], Awaitable[str | None]]:
56
- options = TavilyOptions.model_validate(in_options) if in_options else None
57
- # Separate max_connections (which is an inspect thing) from the rest of the
58
- # options which will be passed in the request body
59
- max_connections = (options.max_connections if options else None) or 10
60
- api_options = (
61
- options.model_dump(exclude={"max_connections"}, exclude_none=True)
62
- if options
63
- else {}
64
- )
65
- if not api_options.get("include_answer", False):
66
- api_options["include_answer"] = True
46
+ class TavilySearchProvider(BaseHttpProvider):
47
+ """Tavily-specific implementation of HttpSearchProvider."""
67
48
 
68
- tavily_api_key = os.environ.get("TAVILY_API_KEY", None)
69
- if not tavily_api_key:
70
- raise PrerequisiteError(
71
- "TAVILY_API_KEY not set in the environment. Please ensure ths variable is defined to use Tavily with the web_search tool.\n\nLearn more about the Tavily web search provider at https://inspect.aisi.org.uk/tools.html#tavily-provider"
49
+ def __init__(self, options: dict[str, Any] | None = None):
50
+ super().__init__(
51
+ env_key_name="TAVILY_API_KEY",
52
+ api_endpoint="https://api.tavily.com/search",
53
+ provider_name="Tavily",
54
+ concurrency_key="tavily_web_search",
55
+ options=options,
72
56
  )
73
57
 
74
- # Create the client within the provider
75
- client = httpx.AsyncClient(timeout=30)
76
-
77
- async def search(query: str) -> str | None:
78
- # See https://docs.tavily.com/documentation/api-reference/endpoint/search
79
- search_url = "https://api.tavily.com/search"
80
- headers = {
81
- "Authorization": f"Bearer {tavily_api_key}",
58
+ def prepare_headers(self, api_key: str) -> dict[str, str]:
59
+ return {
60
+ "Authorization": f"Bearer {api_key}",
82
61
  }
83
62
 
84
- body = {"query": query, **api_options}
85
-
86
- # retry up to 5 times over a period of up to 1 minute
87
- @retry(
88
- wait=wait_exponential_jitter(),
89
- stop=stop_after_attempt(5) | stop_after_delay(60),
90
- retry=retry_if_exception(httpx_should_retry),
91
- before_sleep=log_httpx_retry_attempt(search_url),
63
+ def set_default_options(self, options: dict[str, Any]) -> dict[str, Any]:
64
+ # Force inclusion of answer if not specified
65
+ new_options = options.copy()
66
+ new_options["include_answer"] = True
67
+ return new_options
68
+
69
+ def parse_response(self, response_data: dict[str, Any]) -> ContentText | None:
70
+ tavily_search_response = TavilySearchResponse.model_validate(response_data)
71
+
72
+ if not tavily_search_response.results and not tavily_search_response.answer:
73
+ return None
74
+
75
+ return ContentText(
76
+ text=tavily_search_response.answer or "No answer found.",
77
+ citations=[
78
+ UrlCitation(
79
+ cited_text=result.content, title=result.title, url=result.url
80
+ )
81
+ for result in tavily_search_response.results
82
+ ],
92
83
  )
93
- async def _search() -> httpx.Response:
94
- response = await client.post(search_url, headers=headers, json=body)
95
- response.raise_for_status()
96
- return response
97
-
98
- async with concurrency("tavily_web_search", max_connections):
99
- tavily_search_response = TavilySearchResponse.model_validate(
100
- (await _search()).json()
101
- )
102
- results_str = "\n\n".join(
103
- [
104
- f"[{result.title}]({result.url}):\n{result.content}"
105
- for result in tavily_search_response.results
106
- ]
107
- )
108
- return f"Answer: {tavily_search_response.answer}\n\n{results_str}"
109
-
110
- return search
84
+
85
+
86
+ def tavily_search_provider(
87
+ in_options: dict[str, object] | None = None,
88
+ ) -> SearchProvider:
89
+ options = TavilyOptions.model_validate(in_options) if in_options else None
90
+ return TavilySearchProvider(
91
+ options.model_dump(exclude_none=True) if options else None
92
+ ).search
@@ -1,7 +1,5 @@
1
1
  from typing import (
2
2
  Any,
3
- Awaitable,
4
- Callable,
5
3
  Literal,
6
4
  TypeAlias,
7
5
  TypedDict,
@@ -14,10 +12,14 @@ from inspect_ai._util.deprecation import deprecation_warning
14
12
  from inspect_ai.tool._tool_def import ToolDef
15
13
 
16
14
  from ..._tool import Tool, ToolResult, tool
15
+ from ._exa import ExaOptions, exa_search_provider
17
16
  from ._google import GoogleOptions, google_search_provider
18
17
  from ._tavily import TavilyOptions, tavily_search_provider
18
+ from ._web_search_provider import SearchProvider
19
19
 
20
- Provider: TypeAlias = Literal["openai", "tavily", "google"] # , "gemini", "anthropic"
20
+ Provider: TypeAlias = Literal[
21
+ "gemini", "openai", "anthropic", "tavily", "google", "exa"
22
+ ]
21
23
  valid_providers = set(get_args(Provider))
22
24
 
23
25
 
@@ -30,9 +32,21 @@ valid_providers = set(get_args(Provider))
30
32
  # If the caller uses this dict form and uses a value of `None`, it means that
31
33
  # they want to use that provider and to use the default options.
32
34
  class Providers(TypedDict, total=False):
33
- google: dict[str, Any] | None
34
- tavily: dict[str, Any] | None
35
- openai: dict[str, Any] | None
35
+ openai: dict[str, Any] | Literal[True]
36
+ anthropic: dict[str, Any] | Literal[True]
37
+ gemini: dict[str, Any] | Literal[True]
38
+ tavily: dict[str, Any] | Literal[True]
39
+ google: dict[str, Any] | Literal[True]
40
+ exa: dict[str, Any] | Literal[True]
41
+
42
+
43
+ class _NormalizedProviders(TypedDict, total=False):
44
+ openai: dict[str, Any]
45
+ anthropic: dict[str, Any]
46
+ gemini: dict[str, Any]
47
+ tavily: dict[str, Any]
48
+ google: dict[str, Any]
49
+ exa: dict[str, Any]
36
50
 
37
51
 
38
52
  class WebSearchDeprecatedArgs(TypedDict, total=False):
@@ -53,13 +67,13 @@ def web_search(
53
67
  Web searches are executed using a provider. Providers are split
54
68
  into two categories:
55
69
 
56
- - Internal providers: "openai" - these use the model's built-in search
57
- capability and do not require separate API keys. These work only for
70
+ - Internal providers: "openai", "anthropic" - these use the model's built-in
71
+ search capability and do not require separate API keys. These work only for
58
72
  their respective model provider (e.g. the "openai" search provider
59
73
  works only for `openai/*` models).
60
74
 
61
- - External providers: "tavily" and "google". These are external services
62
- that work with any m odel and require separate accounts and API keys.
75
+ - External providers: "tavily", "google", and "exa". These are external services
76
+ that work with any model and require separate accounts and API keys.
63
77
 
64
78
  Internal providers will be prioritized if running on the corresponding model
65
79
  (e.g., "openai" provider will be used when running on `openai` models). If an
@@ -70,12 +84,12 @@ def web_search(
70
84
 
71
85
  Args:
72
86
  providers: Configuration for the search providers to use. Currently supported
73
- providers are "openai","tavily", and "google", The `providers` parameter
74
- supports several formats based on either a `str` specifying a provider or
75
- a `dict` whose keys are the provider names and whose values are the
76
- provider-specific options. A single value or a list of these can be passed.
77
- This arg is optional just for backwards compatibility. New code should
78
- always provide this argument.
87
+ providers are "openai", "anthropic", "tavily", "google", and "exa". The
88
+ `providers` parameter supports several formats based on either a `str`
89
+ specifying a provider or a `dict` whose keys are the provider names and
90
+ whose values are the provider-specific options. A single value or a list
91
+ of these can be passed. This arg is optional just for backwards compatibility.
92
+ New code should always provide this argument.
79
93
 
80
94
  Single provider:
81
95
  ```
@@ -88,8 +102,8 @@ def web_search(
88
102
  # "openai" used for OpenAI models, "tavily" as fallback
89
103
  web_search(["openai", "tavily"])
90
104
 
91
- # The None value means to use the provider with default options
92
- web_search({"openai": None, "tavily": {"max_results": 5}}
105
+ # The True value means to use the provider with default options
106
+ web_search({"openai": True, "tavily": {"max_results": 5}}
93
107
  ```
94
108
 
95
109
  Mixed format:
@@ -104,9 +118,15 @@ def web_search(
104
118
  - openai: Supports OpenAI's web search parameters.
105
119
  See https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses
106
120
 
121
+ - anthropic: Supports Anthropic's web search parameters.
122
+ See https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/web-search-tool#tool-definition
123
+
107
124
  - tavily: Supports options like `max_results`, `search_depth`, etc.
108
125
  See https://docs.tavily.com/documentation/api-reference/endpoint/search
109
126
 
127
+ - exa: Supports options like `text`, `model`, etc.
128
+ See https://docs.exa.ai/reference/answer
129
+
110
130
  - google: Supports options like `num_results`, `max_provider_calls`,
111
131
  `max_connections`, and `model`
112
132
 
@@ -117,7 +137,7 @@ def web_search(
117
137
  """
118
138
  normalized_providers = _normalize_config(providers, **deprecated)
119
139
 
120
- search_provider: Callable[[str], Awaitable[str | None]] | None = None
140
+ search_provider: SearchProvider | None = None
121
141
 
122
142
  async def execute(query: str) -> ToolResult:
123
143
  """
@@ -131,13 +151,17 @@ def web_search(
131
151
  search_provider = _create_external_provider(normalized_providers)
132
152
  search_result = await search_provider(query)
133
153
 
154
+ # This is gunky here because ToolResult is typed with a List rather than
155
+ # a Sequence, and Lists are variant (rather than covariant). This means
156
+ # it's illegal to assign a List of a narrower type to a List of a broader
157
+ # type. By making a copy of the list and not capturing an alias to it,
158
+ # mypy knows it's safe.
134
159
  return (
135
- (
136
- "Here are your web search results. Please read them carefully as they may be useful later!\n"
137
- + search_result
138
- )
139
- if search_result
140
- else ("I'm sorry, I couldn't find any relevant information on the web.")
160
+ list(search_result)
161
+ if isinstance(search_result, list)
162
+ else search_result
163
+ if search_result is not None
164
+ else "I couldn't find any relevant information on the web."
141
165
  )
142
166
 
143
167
  return ToolDef(
@@ -148,7 +172,7 @@ def web_search(
148
172
  def _normalize_config(
149
173
  providers: Provider | Providers | list[Provider | Providers] | None,
150
174
  **deprecated: Unpack[WebSearchDeprecatedArgs],
151
- ) -> Providers:
175
+ ) -> _NormalizedProviders:
152
176
  """
153
177
  Deal with breaking changes in the web_search parameter list.
154
178
 
@@ -191,34 +215,48 @@ def _normalize_config(
191
215
  )
192
216
 
193
217
  assert providers, "providers should not be None here"
194
- normalized: Providers = {}
218
+ normalized: _NormalizedProviders = {}
195
219
  for entry in providers if isinstance(providers, list) else [providers]:
196
220
  if isinstance(entry, str):
197
221
  if entry not in valid_providers:
198
222
  raise ValueError(f"Invalid provider: '{entry}'")
199
- normalized[entry] = None # type: ignore
223
+ normalized[entry] = {} # type: ignore
200
224
  else:
201
225
  for key, value in entry.items():
202
226
  if key not in valid_providers:
203
227
  raise ValueError(f"Invalid provider: '{key}'")
204
- normalized[key] = value # type: ignore
228
+
229
+ if (
230
+ not isinstance(value, dict)
231
+ and value is not True
232
+ and value is not None
233
+ ):
234
+ raise ValueError(
235
+ f"Invalid value for provider '{key}': {value}. Expected a dict, None, or True."
236
+ )
237
+ normalized[key] = value if isinstance(value, dict) else {} # type: ignore
205
238
  return normalized
206
239
 
207
240
 
208
241
  def _get_config_via_back_compat(
209
- provider: Literal["tavily", "google"],
242
+ provider: Literal["tavily", "google", "exa"],
210
243
  num_results: int | None,
211
244
  max_provider_calls: int | None,
212
245
  max_connections: int | None,
213
246
  model: str | None,
214
- ) -> Providers:
247
+ ) -> _NormalizedProviders:
215
248
  if (
216
249
  num_results is None
217
250
  and max_provider_calls is None
218
251
  and max_connections is None
219
252
  and model is None
220
253
  ):
221
- return {"google": None} if provider == "google" else {"tavily": None}
254
+ if provider == "google":
255
+ return {"google": {}}
256
+ elif provider == "exa":
257
+ return {"exa": {}}
258
+ else:
259
+ return {"tavily": {}}
222
260
 
223
261
  # If we get here, we have at least one old school parameter
224
262
  deprecation_warning(
@@ -234,6 +272,12 @@ def _get_config_via_back_compat(
234
272
  model=model,
235
273
  ).model_dump(exclude_none=True)
236
274
  }
275
+ elif provider == "exa":
276
+ return {
277
+ "exa": ExaOptions(max_connections=max_connections).model_dump(
278
+ exclude_none=True
279
+ )
280
+ }
237
281
  else:
238
282
  return {
239
283
  "tavily": TavilyOptions(
@@ -243,12 +287,15 @@ def _get_config_via_back_compat(
243
287
 
244
288
 
245
289
  def _create_external_provider(
246
- providers: Providers,
247
- ) -> Callable[[str], Awaitable[str | None]]:
290
+ providers: _NormalizedProviders,
291
+ ) -> SearchProvider:
248
292
  if "tavily" in providers:
249
- return tavily_search_provider(providers.get("tavily", None))
293
+ return tavily_search_provider(providers.get("tavily"))
294
+
295
+ if "exa" in providers:
296
+ return exa_search_provider(providers.get("exa"))
250
297
 
251
298
  if "google" in providers:
252
- return google_search_provider(providers.get("google", None))
299
+ return google_search_provider(providers.get("google"))
253
300
 
254
301
  raise ValueError("No valid provider found.")
@@ -0,0 +1,7 @@
1
+ from typing import Awaitable, Callable, TypeAlias
2
+
3
+ from inspect_ai._util.content import ContentText
4
+
5
+ SearchProvider: TypeAlias = Callable[
6
+ [str], Awaitable[str | ContentText | list[ContentText] | None]
7
+ ]
@@ -8,7 +8,7 @@ from inspect_ai._util.thread import is_main_thread
8
8
 
9
9
  logger = getLogger(__name__)
10
10
 
11
- DisplayType = Literal["full", "conversation", "rich", "plain", "none"]
11
+ DisplayType = Literal["full", "conversation", "rich", "plain", "log", "none"]
12
12
  """Console display type."""
13
13
 
14
14
 
@@ -34,7 +34,7 @@ def init_display_type(display: str | None = None) -> DisplayType:
34
34
  display = "plain"
35
35
 
36
36
  match display:
37
- case "full" | "conversation" | "rich" | "plain" | "none":
37
+ case "full" | "conversation" | "rich" | "plain" | "log" | "none":
38
38
  _display_type = display
39
39
  case _:
40
40
  logger.warning(
@@ -57,6 +57,15 @@ def display_type() -> DisplayType:
57
57
  return init_display_type()
58
58
 
59
59
 
60
+ def display_type_plain() -> bool:
61
+ """Does the current display type prefer plain text?
62
+
63
+ Returns:
64
+ bool: True if the display type is "plain" or "log".
65
+ """
66
+ return display_type() in ["plain", "log"]
67
+
68
+
60
69
  def display_type_initialized() -> bool:
61
70
  global _display_type
62
71
  return _display_type is not None
@@ -11,7 +11,7 @@ from pydantic import BaseModel
11
11
  from inspect_ai._util.error import PrerequisiteError
12
12
  from inspect_ai._util.trace import trace_message
13
13
  from inspect_ai.util._concurrency import concurrency
14
- from inspect_ai.util._display import display_type
14
+ from inspect_ai.util._display import display_type, display_type_plain
15
15
  from inspect_ai.util._subprocess import ExecResult, subprocess
16
16
 
17
17
  from .prereqs import (
@@ -285,7 +285,7 @@ async def compose_command(
285
285
  env = project.env if (project.env and forward_env) else {}
286
286
 
287
287
  # ansi (apply global override)
288
- if display_type() == "plain":
288
+ if display_type_plain():
289
289
  ansi = "never"
290
290
  if ansi:
291
291
  compose_command = compose_command + ["--ansi", ansi]
inspect_ai/util/_span.py CHANGED
@@ -1,8 +1,12 @@
1
1
  import contextlib
2
+ import inspect
2
3
  from contextvars import ContextVar
4
+ from logging import getLogger
3
5
  from typing import AsyncIterator
4
6
  from uuid import uuid4
5
7
 
8
+ logger = getLogger(__name__)
9
+
6
10
 
7
11
  @contextlib.asynccontextmanager
8
12
  async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
@@ -22,6 +26,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
22
26
  # span id
23
27
  id = uuid4().hex
24
28
 
29
+ # span caller context
30
+ frame = inspect.stack()[1]
31
+ caller = f"{frame.function}() [{frame.filename}:{frame.lineno}]"
32
+
25
33
  # capture parent id
26
34
  parent_id = _current_span_id.get()
27
35
 
@@ -48,7 +56,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
48
56
  # send end event
49
57
  transcript()._event(SpanEndEvent(id=id))
50
58
 
51
- _current_span_id.reset(token)
59
+ try:
60
+ _current_span_id.reset(token)
61
+ except ValueError:
62
+ logger.warning(f"Exiting span created in another context: {caller}")
52
63
 
53
64
 
54
65
  def current_span_id() -> str | None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.103
3
+ Version: 0.3.104
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -63,7 +63,7 @@ Requires-Dist: groq; extra == "dev"
63
63
  Requires-Dist: ipython; extra == "dev"
64
64
  Requires-Dist: jsonpath-ng; extra == "dev"
65
65
  Requires-Dist: markdown; extra == "dev"
66
- Requires-Dist: mcp; extra == "dev"
66
+ Requires-Dist: mcp>=1.9.4; extra == "dev"
67
67
  Requires-Dist: mistralai; extra == "dev"
68
68
  Requires-Dist: moto[server]; extra == "dev"
69
69
  Requires-Dist: mypy>=1.16.0; extra == "dev"