inspect-ai 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -2
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/evalset.py +2 -2
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +5 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/run.py +1 -1
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/www/dist/assets/index.css +82 -24
- inspect_ai/_view/www/dist/assets/index.js +10124 -9808
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +3 -3
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/json.py +4 -2
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +13 -4
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +196 -34
- inspect_ai/model/_providers/google.py +94 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_span.py +12 -1
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +110 -86
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,4 @@
|
|
1
1
|
import os
|
2
|
-
from typing import Awaitable, Callable
|
3
2
|
|
4
3
|
import anyio
|
5
4
|
import httpx
|
@@ -13,10 +12,14 @@ from tenacity import (
|
|
13
12
|
wait_exponential_jitter,
|
14
13
|
)
|
15
14
|
|
15
|
+
from inspect_ai._util.citation import UrlCitation
|
16
|
+
from inspect_ai._util.content import ContentText
|
16
17
|
from inspect_ai._util.error import PrerequisiteError
|
17
18
|
from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
|
18
19
|
from inspect_ai.util._concurrency import concurrency
|
19
20
|
|
21
|
+
from ._web_search_provider import SearchProvider
|
22
|
+
|
20
23
|
DEFAULT_RELEVANCE_PROMPT = """I am trying to answer the following question and need to find the most relevant information on the web. Please let me know if the following content is relevant to the question or not. You should just respond with "yes" or "no".
|
21
24
|
|
22
25
|
Question: {question}
|
@@ -52,7 +55,7 @@ def maybe_get_google_api_keys() -> tuple[str, str] | None:
|
|
52
55
|
|
53
56
|
def google_search_provider(
|
54
57
|
in_options: dict[str, object] | None = None,
|
55
|
-
) ->
|
58
|
+
) -> SearchProvider:
|
56
59
|
options = GoogleOptions.model_validate(in_options) if in_options else None
|
57
60
|
num_results = (options.num_results if options else None) or 3
|
58
61
|
max_provider_calls = (options.max_provider_calls if options else None) or 3
|
@@ -69,14 +72,13 @@ def google_search_provider(
|
|
69
72
|
# Create the client within the provider
|
70
73
|
client = httpx.AsyncClient()
|
71
74
|
|
72
|
-
async def search(query: str) ->
|
75
|
+
async def search(query: str) -> list[ContentText] | None:
|
73
76
|
# limit number of concurrent searches
|
74
|
-
|
75
|
-
processed_links: list[SearchLink] = []
|
77
|
+
results: list[ContentText] = []
|
76
78
|
search_calls = 0
|
77
79
|
|
78
80
|
# Paginate through search results until we have successfully extracted num_results pages or we have reached max_provider_calls
|
79
|
-
while len(
|
81
|
+
while len(results) < num_results and search_calls < max_provider_calls:
|
80
82
|
async with concurrency("google_web_search", max_connections):
|
81
83
|
links = await _search(query, start_idx=search_calls * 10)
|
82
84
|
|
@@ -84,10 +86,10 @@ def google_search_provider(
|
|
84
86
|
|
85
87
|
async def process_link(link: SearchLink) -> None:
|
86
88
|
try:
|
87
|
-
page
|
88
|
-
|
89
|
-
|
90
|
-
|
89
|
+
if page := await page_if_relevant(
|
90
|
+
link.url, query, model, client
|
91
|
+
):
|
92
|
+
results.append(page)
|
91
93
|
# exceptions fetching pages are very common!
|
92
94
|
except Exception:
|
93
95
|
pass
|
@@ -97,18 +99,7 @@ def google_search_provider(
|
|
97
99
|
|
98
100
|
search_calls += 1
|
99
101
|
|
100
|
-
return
|
101
|
-
"\n\n".join(
|
102
|
-
"[{title}]({url}):\n{page_content}".format(
|
103
|
-
title=link.title, url=link.url, page_content=page_content
|
104
|
-
)
|
105
|
-
for link, page_content in zip(
|
106
|
-
processed_links, page_contents, strict=True
|
107
|
-
)
|
108
|
-
)
|
109
|
-
if processed_links
|
110
|
-
else None
|
111
|
-
)
|
102
|
+
return results or None
|
112
103
|
|
113
104
|
async def _search(query: str, start_idx: int) -> list[SearchLink]:
|
114
105
|
# List of allowed parameters can be found https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
|
@@ -153,7 +144,7 @@ def google_search_provider(
|
|
153
144
|
|
154
145
|
async def page_if_relevant(
|
155
146
|
url: str, query: str, relevance_model: str | None, client: httpx.AsyncClient
|
156
|
-
) ->
|
147
|
+
) -> ContentText | None:
|
157
148
|
"""
|
158
149
|
Use parser model to determine if a web page contents is relevant to a query.
|
159
150
|
|
@@ -181,13 +172,16 @@ async def page_if_relevant(
|
|
181
172
|
# parse it
|
182
173
|
encoding_scheme = response.encoding or "utf-8"
|
183
174
|
soup = BeautifulSoup(response.content.decode(encoding_scheme), "html.parser")
|
175
|
+
page_title = soup.title.get_text(strip=True) if soup.title else None
|
184
176
|
|
185
177
|
main_content = soup.find("main") or soup.find("body") or soup
|
186
178
|
if not isinstance(main_content, NavigableString):
|
187
179
|
paragraphs = main_content.find_all("p")
|
188
180
|
full_text = ""
|
189
181
|
for p in paragraphs:
|
190
|
-
full_text +=
|
182
|
+
full_text += ("\n" if full_text else "") + p.get_text(
|
183
|
+
strip=True, separator=" "
|
184
|
+
)
|
191
185
|
if len(full_text.split()) > 2000:
|
192
186
|
break
|
193
187
|
else:
|
@@ -202,6 +196,9 @@ async def page_if_relevant(
|
|
202
196
|
).message.text
|
203
197
|
|
204
198
|
if "yes" in is_relevant.lower():
|
205
|
-
return
|
199
|
+
return ContentText(
|
200
|
+
text=(f"{page_title}\n" if page_title else "") + full_text,
|
201
|
+
citations=[UrlCitation(url=url, title=page_title)],
|
202
|
+
)
|
206
203
|
else:
|
207
204
|
return None
|
@@ -1,19 +1,12 @@
|
|
1
|
-
import
|
2
|
-
from typing import Awaitable, Callable, Literal
|
1
|
+
from typing import Any, Literal
|
3
2
|
|
4
|
-
import httpx
|
5
3
|
from pydantic import BaseModel, Field
|
6
|
-
from tenacity import (
|
7
|
-
retry,
|
8
|
-
retry_if_exception,
|
9
|
-
stop_after_attempt,
|
10
|
-
stop_after_delay,
|
11
|
-
wait_exponential_jitter,
|
12
|
-
)
|
13
4
|
|
14
|
-
from inspect_ai._util.
|
15
|
-
from inspect_ai._util.
|
16
|
-
|
5
|
+
from inspect_ai._util.citation import UrlCitation
|
6
|
+
from inspect_ai._util.content import ContentText
|
7
|
+
|
8
|
+
from ._base_http_provider import BaseHttpProvider
|
9
|
+
from ._web_search_provider import SearchProvider
|
17
10
|
|
18
11
|
|
19
12
|
class TavilyOptions(BaseModel):
|
@@ -50,61 +43,50 @@ class TavilySearchResponse(BaseModel):
|
|
50
43
|
response_time: float
|
51
44
|
|
52
45
|
|
53
|
-
|
54
|
-
|
55
|
-
) -> Callable[[str], Awaitable[str | None]]:
|
56
|
-
options = TavilyOptions.model_validate(in_options) if in_options else None
|
57
|
-
# Separate max_connections (which is an inspect thing) from the rest of the
|
58
|
-
# options which will be passed in the request body
|
59
|
-
max_connections = (options.max_connections if options else None) or 10
|
60
|
-
api_options = (
|
61
|
-
options.model_dump(exclude={"max_connections"}, exclude_none=True)
|
62
|
-
if options
|
63
|
-
else {}
|
64
|
-
)
|
65
|
-
if not api_options.get("include_answer", False):
|
66
|
-
api_options["include_answer"] = True
|
46
|
+
class TavilySearchProvider(BaseHttpProvider):
|
47
|
+
"""Tavily-specific implementation of HttpSearchProvider."""
|
67
48
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
"
|
49
|
+
def __init__(self, options: dict[str, Any] | None = None):
|
50
|
+
super().__init__(
|
51
|
+
env_key_name="TAVILY_API_KEY",
|
52
|
+
api_endpoint="https://api.tavily.com/search",
|
53
|
+
provider_name="Tavily",
|
54
|
+
concurrency_key="tavily_web_search",
|
55
|
+
options=options,
|
72
56
|
)
|
73
57
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
async def search(query: str) -> str | None:
|
78
|
-
# See https://docs.tavily.com/documentation/api-reference/endpoint/search
|
79
|
-
search_url = "https://api.tavily.com/search"
|
80
|
-
headers = {
|
81
|
-
"Authorization": f"Bearer {tavily_api_key}",
|
58
|
+
def prepare_headers(self, api_key: str) -> dict[str, str]:
|
59
|
+
return {
|
60
|
+
"Authorization": f"Bearer {api_key}",
|
82
61
|
}
|
83
62
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
63
|
+
def set_default_options(self, options: dict[str, Any]) -> dict[str, Any]:
|
64
|
+
# Force inclusion of answer if not specified
|
65
|
+
new_options = options.copy()
|
66
|
+
new_options["include_answer"] = True
|
67
|
+
return new_options
|
68
|
+
|
69
|
+
def parse_response(self, response_data: dict[str, Any]) -> ContentText | None:
|
70
|
+
tavily_search_response = TavilySearchResponse.model_validate(response_data)
|
71
|
+
|
72
|
+
if not tavily_search_response.results and not tavily_search_response.answer:
|
73
|
+
return None
|
74
|
+
|
75
|
+
return ContentText(
|
76
|
+
text=tavily_search_response.answer or "No answer found.",
|
77
|
+
citations=[
|
78
|
+
UrlCitation(
|
79
|
+
cited_text=result.content, title=result.title, url=result.url
|
80
|
+
)
|
81
|
+
for result in tavily_search_response.results
|
82
|
+
],
|
92
83
|
)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
results_str = "\n\n".join(
|
103
|
-
[
|
104
|
-
f"[{result.title}]({result.url}):\n{result.content}"
|
105
|
-
for result in tavily_search_response.results
|
106
|
-
]
|
107
|
-
)
|
108
|
-
return f"Answer: {tavily_search_response.answer}\n\n{results_str}"
|
109
|
-
|
110
|
-
return search
|
84
|
+
|
85
|
+
|
86
|
+
def tavily_search_provider(
|
87
|
+
in_options: dict[str, object] | None = None,
|
88
|
+
) -> SearchProvider:
|
89
|
+
options = TavilyOptions.model_validate(in_options) if in_options else None
|
90
|
+
return TavilySearchProvider(
|
91
|
+
options.model_dump(exclude_none=True) if options else None
|
92
|
+
).search
|
@@ -1,7 +1,5 @@
|
|
1
1
|
from typing import (
|
2
2
|
Any,
|
3
|
-
Awaitable,
|
4
|
-
Callable,
|
5
3
|
Literal,
|
6
4
|
TypeAlias,
|
7
5
|
TypedDict,
|
@@ -14,10 +12,14 @@ from inspect_ai._util.deprecation import deprecation_warning
|
|
14
12
|
from inspect_ai.tool._tool_def import ToolDef
|
15
13
|
|
16
14
|
from ..._tool import Tool, ToolResult, tool
|
15
|
+
from ._exa import ExaOptions, exa_search_provider
|
17
16
|
from ._google import GoogleOptions, google_search_provider
|
18
17
|
from ._tavily import TavilyOptions, tavily_search_provider
|
18
|
+
from ._web_search_provider import SearchProvider
|
19
19
|
|
20
|
-
Provider: TypeAlias = Literal[
|
20
|
+
Provider: TypeAlias = Literal[
|
21
|
+
"gemini", "openai", "anthropic", "tavily", "google", "exa"
|
22
|
+
]
|
21
23
|
valid_providers = set(get_args(Provider))
|
22
24
|
|
23
25
|
|
@@ -30,9 +32,21 @@ valid_providers = set(get_args(Provider))
|
|
30
32
|
# If the caller uses this dict form and uses a value of `None`, it means that
|
31
33
|
# they want to use that provider and to use the default options.
|
32
34
|
class Providers(TypedDict, total=False):
|
33
|
-
|
34
|
-
|
35
|
-
|
35
|
+
openai: dict[str, Any] | Literal[True]
|
36
|
+
anthropic: dict[str, Any] | Literal[True]
|
37
|
+
gemini: dict[str, Any] | Literal[True]
|
38
|
+
tavily: dict[str, Any] | Literal[True]
|
39
|
+
google: dict[str, Any] | Literal[True]
|
40
|
+
exa: dict[str, Any] | Literal[True]
|
41
|
+
|
42
|
+
|
43
|
+
class _NormalizedProviders(TypedDict, total=False):
|
44
|
+
openai: dict[str, Any]
|
45
|
+
anthropic: dict[str, Any]
|
46
|
+
gemini: dict[str, Any]
|
47
|
+
tavily: dict[str, Any]
|
48
|
+
google: dict[str, Any]
|
49
|
+
exa: dict[str, Any]
|
36
50
|
|
37
51
|
|
38
52
|
class WebSearchDeprecatedArgs(TypedDict, total=False):
|
@@ -53,13 +67,13 @@ def web_search(
|
|
53
67
|
Web searches are executed using a provider. Providers are split
|
54
68
|
into two categories:
|
55
69
|
|
56
|
-
- Internal providers: "openai" - these use the model's built-in
|
57
|
-
capability and do not require separate API keys. These work only for
|
70
|
+
- Internal providers: "openai", "anthropic" - these use the model's built-in
|
71
|
+
search capability and do not require separate API keys. These work only for
|
58
72
|
their respective model provider (e.g. the "openai" search provider
|
59
73
|
works only for `openai/*` models).
|
60
74
|
|
61
|
-
- External providers: "tavily" and "
|
62
|
-
that work with any
|
75
|
+
- External providers: "tavily", "google", and "exa". These are external services
|
76
|
+
that work with any model and require separate accounts and API keys.
|
63
77
|
|
64
78
|
Internal providers will be prioritized if running on the corresponding model
|
65
79
|
(e.g., "openai" provider will be used when running on `openai` models). If an
|
@@ -70,12 +84,12 @@ def web_search(
|
|
70
84
|
|
71
85
|
Args:
|
72
86
|
providers: Configuration for the search providers to use. Currently supported
|
73
|
-
providers are "openai","tavily",
|
74
|
-
supports several formats based on either a `str`
|
75
|
-
a `dict` whose keys are the provider names and
|
76
|
-
provider-specific options. A single value or a list
|
77
|
-
This arg is optional just for backwards compatibility.
|
78
|
-
always provide this argument.
|
87
|
+
providers are "openai", "anthropic", "tavily", "google", and "exa". The
|
88
|
+
`providers` parameter supports several formats based on either a `str`
|
89
|
+
specifying a provider or a `dict` whose keys are the provider names and
|
90
|
+
whose values are the provider-specific options. A single value or a list
|
91
|
+
of these can be passed. This arg is optional just for backwards compatibility.
|
92
|
+
New code should always provide this argument.
|
79
93
|
|
80
94
|
Single provider:
|
81
95
|
```
|
@@ -88,8 +102,8 @@ def web_search(
|
|
88
102
|
# "openai" used for OpenAI models, "tavily" as fallback
|
89
103
|
web_search(["openai", "tavily"])
|
90
104
|
|
91
|
-
# The
|
92
|
-
web_search({"openai":
|
105
|
+
# The True value means to use the provider with default options
|
106
|
+
web_search({"openai": True, "tavily": {"max_results": 5}}
|
93
107
|
```
|
94
108
|
|
95
109
|
Mixed format:
|
@@ -104,9 +118,15 @@ def web_search(
|
|
104
118
|
- openai: Supports OpenAI's web search parameters.
|
105
119
|
See https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses
|
106
120
|
|
121
|
+
- anthropic: Supports Anthropic's web search parameters.
|
122
|
+
See https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/web-search-tool#tool-definition
|
123
|
+
|
107
124
|
- tavily: Supports options like `max_results`, `search_depth`, etc.
|
108
125
|
See https://docs.tavily.com/documentation/api-reference/endpoint/search
|
109
126
|
|
127
|
+
- exa: Supports options like `text`, `model`, etc.
|
128
|
+
See https://docs.exa.ai/reference/answer
|
129
|
+
|
110
130
|
- google: Supports options like `num_results`, `max_provider_calls`,
|
111
131
|
`max_connections`, and `model`
|
112
132
|
|
@@ -117,7 +137,7 @@ def web_search(
|
|
117
137
|
"""
|
118
138
|
normalized_providers = _normalize_config(providers, **deprecated)
|
119
139
|
|
120
|
-
search_provider:
|
140
|
+
search_provider: SearchProvider | None = None
|
121
141
|
|
122
142
|
async def execute(query: str) -> ToolResult:
|
123
143
|
"""
|
@@ -131,13 +151,17 @@ def web_search(
|
|
131
151
|
search_provider = _create_external_provider(normalized_providers)
|
132
152
|
search_result = await search_provider(query)
|
133
153
|
|
154
|
+
# This is gunky here because ToolResult is typed with a List rather than
|
155
|
+
# a Sequence, and Lists are variant (rather than covariant). This means
|
156
|
+
# it's illegal to assign a List of a narrower type to a List of a broader
|
157
|
+
# type. By making a copy of the list and not capturing an alias to it,
|
158
|
+
# mypy knows it's safe.
|
134
159
|
return (
|
135
|
-
(
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
else ("I'm sorry, I couldn't find any relevant information on the web.")
|
160
|
+
list(search_result)
|
161
|
+
if isinstance(search_result, list)
|
162
|
+
else search_result
|
163
|
+
if search_result is not None
|
164
|
+
else "I couldn't find any relevant information on the web."
|
141
165
|
)
|
142
166
|
|
143
167
|
return ToolDef(
|
@@ -148,7 +172,7 @@ def web_search(
|
|
148
172
|
def _normalize_config(
|
149
173
|
providers: Provider | Providers | list[Provider | Providers] | None,
|
150
174
|
**deprecated: Unpack[WebSearchDeprecatedArgs],
|
151
|
-
) ->
|
175
|
+
) -> _NormalizedProviders:
|
152
176
|
"""
|
153
177
|
Deal with breaking changes in the web_search parameter list.
|
154
178
|
|
@@ -191,34 +215,48 @@ def _normalize_config(
|
|
191
215
|
)
|
192
216
|
|
193
217
|
assert providers, "providers should not be None here"
|
194
|
-
normalized:
|
218
|
+
normalized: _NormalizedProviders = {}
|
195
219
|
for entry in providers if isinstance(providers, list) else [providers]:
|
196
220
|
if isinstance(entry, str):
|
197
221
|
if entry not in valid_providers:
|
198
222
|
raise ValueError(f"Invalid provider: '{entry}'")
|
199
|
-
normalized[entry] =
|
223
|
+
normalized[entry] = {} # type: ignore
|
200
224
|
else:
|
201
225
|
for key, value in entry.items():
|
202
226
|
if key not in valid_providers:
|
203
227
|
raise ValueError(f"Invalid provider: '{key}'")
|
204
|
-
|
228
|
+
|
229
|
+
if (
|
230
|
+
not isinstance(value, dict)
|
231
|
+
and value is not True
|
232
|
+
and value is not None
|
233
|
+
):
|
234
|
+
raise ValueError(
|
235
|
+
f"Invalid value for provider '{key}': {value}. Expected a dict, None, or True."
|
236
|
+
)
|
237
|
+
normalized[key] = value if isinstance(value, dict) else {} # type: ignore
|
205
238
|
return normalized
|
206
239
|
|
207
240
|
|
208
241
|
def _get_config_via_back_compat(
|
209
|
-
provider: Literal["tavily", "google"],
|
242
|
+
provider: Literal["tavily", "google", "exa"],
|
210
243
|
num_results: int | None,
|
211
244
|
max_provider_calls: int | None,
|
212
245
|
max_connections: int | None,
|
213
246
|
model: str | None,
|
214
|
-
) ->
|
247
|
+
) -> _NormalizedProviders:
|
215
248
|
if (
|
216
249
|
num_results is None
|
217
250
|
and max_provider_calls is None
|
218
251
|
and max_connections is None
|
219
252
|
and model is None
|
220
253
|
):
|
221
|
-
|
254
|
+
if provider == "google":
|
255
|
+
return {"google": {}}
|
256
|
+
elif provider == "exa":
|
257
|
+
return {"exa": {}}
|
258
|
+
else:
|
259
|
+
return {"tavily": {}}
|
222
260
|
|
223
261
|
# If we get here, we have at least one old school parameter
|
224
262
|
deprecation_warning(
|
@@ -234,6 +272,12 @@ def _get_config_via_back_compat(
|
|
234
272
|
model=model,
|
235
273
|
).model_dump(exclude_none=True)
|
236
274
|
}
|
275
|
+
elif provider == "exa":
|
276
|
+
return {
|
277
|
+
"exa": ExaOptions(max_connections=max_connections).model_dump(
|
278
|
+
exclude_none=True
|
279
|
+
)
|
280
|
+
}
|
237
281
|
else:
|
238
282
|
return {
|
239
283
|
"tavily": TavilyOptions(
|
@@ -243,12 +287,15 @@ def _get_config_via_back_compat(
|
|
243
287
|
|
244
288
|
|
245
289
|
def _create_external_provider(
|
246
|
-
providers:
|
247
|
-
) ->
|
290
|
+
providers: _NormalizedProviders,
|
291
|
+
) -> SearchProvider:
|
248
292
|
if "tavily" in providers:
|
249
|
-
return tavily_search_provider(providers.get("tavily"
|
293
|
+
return tavily_search_provider(providers.get("tavily"))
|
294
|
+
|
295
|
+
if "exa" in providers:
|
296
|
+
return exa_search_provider(providers.get("exa"))
|
250
297
|
|
251
298
|
if "google" in providers:
|
252
|
-
return google_search_provider(providers.get("google"
|
299
|
+
return google_search_provider(providers.get("google"))
|
253
300
|
|
254
301
|
raise ValueError("No valid provider found.")
|
inspect_ai/util/_display.py
CHANGED
@@ -8,7 +8,7 @@ from inspect_ai._util.thread import is_main_thread
|
|
8
8
|
|
9
9
|
logger = getLogger(__name__)
|
10
10
|
|
11
|
-
DisplayType = Literal["full", "conversation", "rich", "plain", "none"]
|
11
|
+
DisplayType = Literal["full", "conversation", "rich", "plain", "log", "none"]
|
12
12
|
"""Console display type."""
|
13
13
|
|
14
14
|
|
@@ -34,7 +34,7 @@ def init_display_type(display: str | None = None) -> DisplayType:
|
|
34
34
|
display = "plain"
|
35
35
|
|
36
36
|
match display:
|
37
|
-
case "full" | "conversation" | "rich" | "plain" | "none":
|
37
|
+
case "full" | "conversation" | "rich" | "plain" | "log" | "none":
|
38
38
|
_display_type = display
|
39
39
|
case _:
|
40
40
|
logger.warning(
|
@@ -57,6 +57,15 @@ def display_type() -> DisplayType:
|
|
57
57
|
return init_display_type()
|
58
58
|
|
59
59
|
|
60
|
+
def display_type_plain() -> bool:
|
61
|
+
"""Does the current display type prefer plain text?
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
bool: True if the display type is "plain" or "log".
|
65
|
+
"""
|
66
|
+
return display_type() in ["plain", "log"]
|
67
|
+
|
68
|
+
|
60
69
|
def display_type_initialized() -> bool:
|
61
70
|
global _display_type
|
62
71
|
return _display_type is not None
|
@@ -11,7 +11,7 @@ from pydantic import BaseModel
|
|
11
11
|
from inspect_ai._util.error import PrerequisiteError
|
12
12
|
from inspect_ai._util.trace import trace_message
|
13
13
|
from inspect_ai.util._concurrency import concurrency
|
14
|
-
from inspect_ai.util._display import display_type
|
14
|
+
from inspect_ai.util._display import display_type, display_type_plain
|
15
15
|
from inspect_ai.util._subprocess import ExecResult, subprocess
|
16
16
|
|
17
17
|
from .prereqs import (
|
@@ -285,7 +285,7 @@ async def compose_command(
|
|
285
285
|
env = project.env if (project.env and forward_env) else {}
|
286
286
|
|
287
287
|
# ansi (apply global override)
|
288
|
-
if
|
288
|
+
if display_type_plain():
|
289
289
|
ansi = "never"
|
290
290
|
if ansi:
|
291
291
|
compose_command = compose_command + ["--ansi", ansi]
|
inspect_ai/util/_span.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
import contextlib
|
2
|
+
import inspect
|
2
3
|
from contextvars import ContextVar
|
4
|
+
from logging import getLogger
|
3
5
|
from typing import AsyncIterator
|
4
6
|
from uuid import uuid4
|
5
7
|
|
8
|
+
logger = getLogger(__name__)
|
9
|
+
|
6
10
|
|
7
11
|
@contextlib.asynccontextmanager
|
8
12
|
async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
|
@@ -22,6 +26,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
|
|
22
26
|
# span id
|
23
27
|
id = uuid4().hex
|
24
28
|
|
29
|
+
# span caller context
|
30
|
+
frame = inspect.stack()[1]
|
31
|
+
caller = f"{frame.function}() [{frame.filename}:{frame.lineno}]"
|
32
|
+
|
25
33
|
# capture parent id
|
26
34
|
parent_id = _current_span_id.get()
|
27
35
|
|
@@ -48,7 +56,10 @@ async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
|
|
48
56
|
# send end event
|
49
57
|
transcript()._event(SpanEndEvent(id=id))
|
50
58
|
|
51
|
-
|
59
|
+
try:
|
60
|
+
_current_span_id.reset(token)
|
61
|
+
except ValueError:
|
62
|
+
logger.warning(f"Exiting span created in another context: {caller}")
|
52
63
|
|
53
64
|
|
54
65
|
def current_span_id() -> str | None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.104
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
@@ -63,7 +63,7 @@ Requires-Dist: groq; extra == "dev"
|
|
63
63
|
Requires-Dist: ipython; extra == "dev"
|
64
64
|
Requires-Dist: jsonpath-ng; extra == "dev"
|
65
65
|
Requires-Dist: markdown; extra == "dev"
|
66
|
-
Requires-Dist: mcp; extra == "dev"
|
66
|
+
Requires-Dist: mcp>=1.9.4; extra == "dev"
|
67
67
|
Requires-Dist: mistralai; extra == "dev"
|
68
68
|
Requires-Dist: moto[server]; extra == "dev"
|
69
69
|
Requires-Dist: mypy>=1.16.0; extra == "dev"
|