inspect-ai 0.3.73__py3-none-any.whl → 0.3.75__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +3 -2
- inspect_ai/_cli/cache.py +1 -1
- inspect_ai/_cli/common.py +15 -0
- inspect_ai/_cli/eval.py +4 -5
- inspect_ai/_cli/log.py +1 -1
- inspect_ai/_cli/sandbox.py +1 -1
- inspect_ai/_cli/trace.py +1 -1
- inspect_ai/_cli/view.py +1 -1
- inspect_ai/_display/core/config.py +3 -1
- inspect_ai/_eval/eval.py +55 -61
- inspect_ai/_eval/evalset.py +63 -154
- inspect_ai/_eval/loader.py +27 -54
- inspect_ai/_eval/registry.py +1 -10
- inspect_ai/_eval/run.py +3 -4
- inspect_ai/_eval/task/__init__.py +8 -2
- inspect_ai/_eval/task/log.py +9 -1
- inspect_ai/_eval/task/resolved.py +35 -0
- inspect_ai/_eval/task/task.py +50 -69
- inspect_ai/_eval/task/tasks.py +30 -0
- inspect_ai/_util/constants.py +3 -0
- inspect_ai/_util/dotenv.py +17 -0
- inspect_ai/_util/registry.py +43 -2
- inspect_ai/_view/server.py +28 -10
- inspect_ai/_view/www/dist/assets/index.css +4 -3
- inspect_ai/_view/www/dist/assets/index.js +13030 -25523
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/appearance/styles.ts +6 -5
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +2 -2
- inspect_ai/_view/www/src/constants.ts +3 -0
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +141 -20
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +2 -1
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +7 -5
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +1 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +3 -1
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +5 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +5 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +17 -12
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -1
- inspect_ai/_view/www/yarn.lock +12 -5
- inspect_ai/log/_log.py +10 -1
- inspect_ai/log/_recorders/eval.py +27 -8
- inspect_ai/log/_recorders/json.py +2 -2
- inspect_ai/model/_cache.py +3 -1
- inspect_ai/model/_chat_message.py +12 -1
- inspect_ai/model/_model.py +25 -11
- inspect_ai/model/_providers/anthropic.py +34 -2
- inspect_ai/model/_providers/google.py +6 -2
- inspect_ai/model/_providers/none.py +31 -0
- inspect_ai/model/_providers/providers.py +7 -0
- inspect_ai/solver/_bridge/bridge.py +1 -1
- inspect_ai/solver/_chain.py +7 -6
- inspect_ai/tool/_tools/_computer/_computer.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +1 -1
- inspect_ai/tool/_tools/_web_search.py +2 -2
- inspect_ai/util/_sandbox/context.py +2 -1
- inspect_ai/util/_sandbox/environment.py +17 -2
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/METADATA +4 -4
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/RECORD +63 -60
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ from typing import Any, Literal, Type, Union
|
|
4
4
|
from pydantic import BaseModel, Field, model_validator
|
5
5
|
from shortuuid import uuid
|
6
6
|
|
7
|
+
from inspect_ai._util.constants import DESERIALIZING
|
7
8
|
from inspect_ai._util.content import Content, ContentReasoning, ContentText
|
8
9
|
from inspect_ai.tool import ToolCall
|
9
10
|
from inspect_ai.tool._tool_call import ToolCallError
|
@@ -16,7 +17,7 @@ logger = getLogger(__name__)
|
|
16
17
|
class ChatMessageBase(BaseModel):
|
17
18
|
"""Base class for chat messages."""
|
18
19
|
|
19
|
-
id: str = Field(
|
20
|
+
id: str | None = Field(default=None)
|
20
21
|
"""Unique identifer for message."""
|
21
22
|
|
22
23
|
content: str | list[Content]
|
@@ -25,6 +26,16 @@ class ChatMessageBase(BaseModel):
|
|
25
26
|
source: Literal["input", "generate"] | None = Field(default=None)
|
26
27
|
"""Source of message."""
|
27
28
|
|
29
|
+
def model_post_init(self, __context: Any) -> None:
|
30
|
+
# check if deserializing
|
31
|
+
is_deserializing = isinstance(__context, dict) and __context.get(
|
32
|
+
DESERIALIZING, False
|
33
|
+
)
|
34
|
+
|
35
|
+
# Generate ID if needed and not deserializing
|
36
|
+
if self.id is None and not is_deserializing:
|
37
|
+
self.id = uuid()
|
38
|
+
|
28
39
|
@property
|
29
40
|
def text(self) -> str:
|
30
41
|
"""Get the text content of this message.
|
inspect_ai/model/_model.py
CHANGED
@@ -33,6 +33,7 @@ from inspect_ai._util.content import (
|
|
33
33
|
from inspect_ai._util.hooks import init_hooks, override_api_key, send_telemetry
|
34
34
|
from inspect_ai._util.interrupt import check_sample_interrupt
|
35
35
|
from inspect_ai._util.logger import warn_once
|
36
|
+
from inspect_ai._util.notgiven import NOT_GIVEN, NotGiven
|
36
37
|
from inspect_ai._util.platform import platform_init
|
37
38
|
from inspect_ai._util.registry import (
|
38
39
|
RegistryInfo,
|
@@ -77,7 +78,7 @@ class ModelAPI(abc.ABC):
|
|
77
78
|
by the user. You can then pass these on to the approriate place in
|
78
79
|
your model initialisation code (for example, here is what many
|
79
80
|
of the built-in providers do with the `model_args` passed to them:
|
80
|
-
https://inspect.
|
81
|
+
https://inspect.aisi.org.uk/models.html#model-args)
|
81
82
|
"""
|
82
83
|
|
83
84
|
def __init__(
|
@@ -232,15 +233,19 @@ class Model:
|
|
232
233
|
config: GenerateConfig
|
233
234
|
"""Generation config."""
|
234
235
|
|
235
|
-
def __init__(
|
236
|
+
def __init__(
|
237
|
+
self, api: ModelAPI, config: GenerateConfig, model_args: dict[str, Any] = {}
|
238
|
+
) -> None:
|
236
239
|
"""Create a model.
|
237
240
|
|
238
241
|
Args:
|
239
242
|
api: Model API provider.
|
240
243
|
config: Model configuration.
|
244
|
+
model_args: Optional model args
|
241
245
|
"""
|
242
246
|
self.api = api
|
243
247
|
self.config = config
|
248
|
+
self.model_args = model_args
|
244
249
|
|
245
250
|
# state indicating whether our lifetime is bound by a context manager
|
246
251
|
self._context_bound = False
|
@@ -773,6 +778,10 @@ def get_model(
|
|
773
778
|
if isinstance(model, Model):
|
774
779
|
return model
|
775
780
|
|
781
|
+
# next see if this is the special "none" model
|
782
|
+
if model == "none":
|
783
|
+
model = "none/none"
|
784
|
+
|
776
785
|
# now try finding an 'ambient' model (active or env var)
|
777
786
|
if model is None:
|
778
787
|
# return active_model if there is one
|
@@ -835,7 +844,7 @@ def get_model(
|
|
835
844
|
config=config,
|
836
845
|
**model_args,
|
837
846
|
)
|
838
|
-
m = Model(modelapi_instance, config)
|
847
|
+
m = Model(modelapi_instance, config, model_args)
|
839
848
|
if memoize:
|
840
849
|
_models[model_cache_key] = m
|
841
850
|
return m
|
@@ -860,17 +869,25 @@ def cached_model(key: str) -> Model | None:
|
|
860
869
|
|
861
870
|
|
862
871
|
def resolve_models(
|
863
|
-
model: str | Model | list[str] | list[Model] | None,
|
872
|
+
model: str | Model | list[str] | list[Model] | None | NotGiven = NOT_GIVEN,
|
864
873
|
model_base_url: str | None = None,
|
865
874
|
model_args: dict[str, Any] = dict(),
|
866
875
|
config: GenerateConfig = GenerateConfig(),
|
867
876
|
) -> list[Model]:
|
877
|
+
# resolve NotGiven to current INSPECT_EVAL_MODEL
|
878
|
+
if isinstance(model, NotGiven):
|
879
|
+
model = os.getenv("INSPECT_EVAL_MODEL", None)
|
880
|
+
|
881
|
+
# resolve None to NoModel
|
882
|
+
if model is None:
|
883
|
+
return [get_model("none")]
|
884
|
+
|
868
885
|
# reflect back a plain model
|
869
886
|
if isinstance(model, Model):
|
870
887
|
return [model]
|
871
888
|
|
872
889
|
# helper to resolve model of various types
|
873
|
-
def resolve_model(m: str | Model
|
890
|
+
def resolve_model(m: str | Model) -> Model:
|
874
891
|
return get_model(
|
875
892
|
model=m,
|
876
893
|
base_url=model_base_url,
|
@@ -878,11 +895,8 @@ def resolve_models(
|
|
878
895
|
**model_args,
|
879
896
|
)
|
880
897
|
|
881
|
-
#
|
882
|
-
if
|
883
|
-
model = model or os.getenv("INSPECT_EVAL_MODEL", None)
|
884
|
-
if model is None:
|
885
|
-
raise ValueError("No model specified (and no INSPECT_EVAL_MODEL defined)")
|
898
|
+
# str to list
|
899
|
+
if isinstance(model, str):
|
886
900
|
model = [m.strip() for m in model.split(",")]
|
887
901
|
|
888
902
|
# resolve models
|
@@ -1236,7 +1250,7 @@ def active_model() -> Model | None:
|
|
1236
1250
|
|
1237
1251
|
|
1238
1252
|
# shared contexts for asyncio tasks
|
1239
|
-
active_model_context_var: ContextVar[Model] = ContextVar("active_model")
|
1253
|
+
active_model_context_var: ContextVar[Model | None] = ContextVar("active_model")
|
1240
1254
|
|
1241
1255
|
|
1242
1256
|
def handle_sample_message_limit(input: str | list[ChatMessage]) -> None:
|
@@ -240,7 +240,9 @@ class AnthropicAPI(ModelAPI):
|
|
240
240
|
response = message.model_dump()
|
241
241
|
|
242
242
|
# extract output
|
243
|
-
output = model_output_from_message(
|
243
|
+
output = await model_output_from_message(
|
244
|
+
self.client, self.model_name, message, tools
|
245
|
+
)
|
244
246
|
|
245
247
|
# return output and call
|
246
248
|
return output, model_call()
|
@@ -724,9 +726,15 @@ async def message_param(message: ChatMessage) -> MessageParam:
|
|
724
726
|
)
|
725
727
|
|
726
728
|
|
727
|
-
def model_output_from_message(
|
729
|
+
async def model_output_from_message(
|
730
|
+
client: AsyncAnthropic | AsyncAnthropicBedrock | AsyncAnthropicVertex,
|
731
|
+
model: str,
|
732
|
+
message: Message,
|
733
|
+
tools: list[ToolInfo],
|
734
|
+
) -> ModelOutput:
|
728
735
|
# extract content and tool calls
|
729
736
|
content: list[Content] = []
|
737
|
+
reasoning_tokens = 0
|
730
738
|
tool_calls: list[ToolCall] | None = None
|
731
739
|
|
732
740
|
for content_block in message.content:
|
@@ -754,6 +762,9 @@ def model_output_from_message(message: Message, tools: list[ToolInfo]) -> ModelO
|
|
754
762
|
ContentReasoning(reasoning=content_block.data, redacted=True)
|
755
763
|
)
|
756
764
|
elif isinstance(content_block, ThinkingBlock):
|
765
|
+
reasoning_tokens += await count_tokens(
|
766
|
+
client, model, content_block.thinking
|
767
|
+
)
|
757
768
|
content.append(
|
758
769
|
ContentReasoning(
|
759
770
|
reasoning=content_block.thinking, signature=content_block.signature
|
@@ -787,6 +798,7 @@ def model_output_from_message(message: Message, tools: list[ToolInfo]) -> ModelO
|
|
787
798
|
total_tokens=total_tokens,
|
788
799
|
input_tokens_cache_write=input_tokens_cache_write,
|
789
800
|
input_tokens_cache_read=input_tokens_cache_read,
|
801
|
+
reasoning_tokens=reasoning_tokens if reasoning_tokens > 0 else None,
|
790
802
|
),
|
791
803
|
)
|
792
804
|
|
@@ -852,6 +864,26 @@ async def message_param_content(
|
|
852
864
|
)
|
853
865
|
|
854
866
|
|
867
|
+
async def count_tokens(
|
868
|
+
client: AsyncAnthropic | AsyncAnthropicBedrock | AsyncAnthropicVertex,
|
869
|
+
model: str,
|
870
|
+
text: str,
|
871
|
+
) -> int:
|
872
|
+
try:
|
873
|
+
response = await client.messages.count_tokens(
|
874
|
+
model=model,
|
875
|
+
messages=[{"role": "user", "content": text}],
|
876
|
+
)
|
877
|
+
return response.input_tokens
|
878
|
+
except Exception as e:
|
879
|
+
logger.warning(
|
880
|
+
f"Error counting tokens (falling back to estimated tokens): {str(e)}"
|
881
|
+
)
|
882
|
+
words = text.split()
|
883
|
+
estimated_tokens = int(len(words) * 1.3)
|
884
|
+
return estimated_tokens
|
885
|
+
|
886
|
+
|
855
887
|
def model_call_filter(key: JsonValue | None, value: JsonValue) -> JsonValue:
|
856
888
|
# remove base64 encoded images
|
857
889
|
if (
|
@@ -267,8 +267,12 @@ class GoogleGenAIAPI(ModelAPI):
|
|
267
267
|
import requests # type: ignore
|
268
268
|
|
269
269
|
# standard http errors
|
270
|
-
if
|
271
|
-
|
270
|
+
if (
|
271
|
+
isinstance(ex, APIError)
|
272
|
+
and isinstance(ex.status, str)
|
273
|
+
and ex.status.isdigit()
|
274
|
+
):
|
275
|
+
return is_retryable_http_status(int(ex.status))
|
272
276
|
|
273
277
|
# low-level requests exceptions
|
274
278
|
elif isinstance(ex, requests.exceptions.RequestException):
|
@@ -0,0 +1,31 @@
|
|
1
|
+
from inspect_ai._util.error import PrerequisiteError
|
2
|
+
from inspect_ai.tool import ToolChoice, ToolInfo
|
3
|
+
|
4
|
+
from .._chat_message import ChatMessage
|
5
|
+
from .._generate_config import GenerateConfig
|
6
|
+
from .._model import ModelAPI
|
7
|
+
from .._model_output import ModelOutput
|
8
|
+
|
9
|
+
|
10
|
+
class NoModel(ModelAPI):
|
11
|
+
"""A sentinel model type indicating there is no model specified."""
|
12
|
+
|
13
|
+
def __init__(
|
14
|
+
self,
|
15
|
+
model_name: str = "none",
|
16
|
+
base_url: str | None = None,
|
17
|
+
api_key: str | None = None,
|
18
|
+
config: GenerateConfig = GenerateConfig(),
|
19
|
+
) -> None:
|
20
|
+
super().__init__(model_name, base_url, api_key, [], config)
|
21
|
+
|
22
|
+
async def generate(
|
23
|
+
self,
|
24
|
+
input: list[ChatMessage],
|
25
|
+
tools: list[ToolInfo],
|
26
|
+
tool_choice: ToolChoice,
|
27
|
+
config: GenerateConfig,
|
28
|
+
) -> ModelOutput:
|
29
|
+
raise PrerequisiteError(
|
30
|
+
"No model specified (and no INSPECT_EVAL_MODEL defined)"
|
31
|
+
)
|
@@ -250,6 +250,13 @@ def mockllm() -> type[ModelAPI]:
|
|
250
250
|
return MockLLM
|
251
251
|
|
252
252
|
|
253
|
+
@modelapi(name="none")
|
254
|
+
def none() -> type[ModelAPI]:
|
255
|
+
from .none import NoModel
|
256
|
+
|
257
|
+
return NoModel
|
258
|
+
|
259
|
+
|
253
260
|
@modelapi("goodfire")
|
254
261
|
def goodfire() -> type[ModelAPI]:
|
255
262
|
"""Get the Goodfire API provider."""
|
@@ -17,7 +17,7 @@ from .._task_state import TaskState
|
|
17
17
|
def bridge(agent: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> Solver:
|
18
18
|
"""Bridge an external agent into an Inspect Solver.
|
19
19
|
|
20
|
-
See documentation at <https://inspect.
|
20
|
+
See documentation at <https://inspect.aisi.org.uk/agent-bridge.html>
|
21
21
|
|
22
22
|
Args:
|
23
23
|
agent: Callable which takes a sample `dict` and returns a result `dict`.
|
inspect_ai/solver/_chain.py
CHANGED
@@ -2,10 +2,11 @@ from typing import Sequence, overload
|
|
2
2
|
|
3
3
|
from typing_extensions import override
|
4
4
|
|
5
|
-
from ._solver import Generate, Solver
|
5
|
+
from ._solver import Generate, Solver, solver
|
6
6
|
from ._task_state import TaskState
|
7
7
|
|
8
8
|
|
9
|
+
@solver
|
9
10
|
def chain(*solvers: Solver | list[Solver]) -> Solver:
|
10
11
|
"""Compose a solver from multiple other solvers.
|
11
12
|
|
@@ -22,8 +23,8 @@ def chain(*solvers: Solver | list[Solver]) -> Solver:
|
|
22
23
|
"""
|
23
24
|
# flatten lists and chains
|
24
25
|
all_solvers: list[Solver] = []
|
25
|
-
for
|
26
|
-
all_solvers.extend(unroll(
|
26
|
+
for s in solvers:
|
27
|
+
all_solvers.extend(unroll(s))
|
27
28
|
|
28
29
|
return Chain(all_solvers)
|
29
30
|
|
@@ -72,9 +73,9 @@ class Chain(Sequence[Solver], Solver):
|
|
72
73
|
) -> TaskState:
|
73
74
|
from ._transcript import solver_transcript
|
74
75
|
|
75
|
-
for
|
76
|
-
with solver_transcript(
|
77
|
-
state = await
|
76
|
+
for slv in self._solvers:
|
77
|
+
with solver_transcript(slv, state) as st:
|
78
|
+
state = await slv(state, generate)
|
78
79
|
st.complete(state)
|
79
80
|
if state.completed:
|
80
81
|
break
|
@@ -15,7 +15,7 @@ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
|
|
15
15
|
def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
|
16
16
|
"""Desktop computer tool.
|
17
17
|
|
18
|
-
See documentation at <https://inspect.
|
18
|
+
See documentation at <https://inspect.aisi.org.uk/tools.html#sec-computer>.
|
19
19
|
|
20
20
|
Args:
|
21
21
|
max_screenshots: The maximum number of screenshots to play
|
@@ -17,7 +17,7 @@ from inspect_ai.util._store_model import StoreModel, store_as
|
|
17
17
|
def web_browser(interactive: bool = True) -> list[Tool]:
|
18
18
|
"""Tools used for web browser navigation.
|
19
19
|
|
20
|
-
See documentation at <https://inspect.
|
20
|
+
See documentation at <https://inspect.aisi.org.uk/tools.html#sec-web-browser>.
|
21
21
|
|
22
22
|
Args:
|
23
23
|
interactive: Provide interactive tools (enable
|
@@ -52,7 +52,7 @@ def web_search(
|
|
52
52
|
A web search is conducted using the specified provider, the results are parsed for relevance
|
53
53
|
using the specified model, and the top 'num_results' relevant pages are returned.
|
54
54
|
|
55
|
-
See further documentation at <https://inspect.
|
55
|
+
See further documentation at <https://inspect.aisi.org.uk/tools.html#sec-web-search>.
|
56
56
|
|
57
57
|
Args:
|
58
58
|
provider: Search provider (defaults to "google", currently
|
@@ -190,7 +190,7 @@ def google_search_provider(client: httpx.AsyncClient) -> SearchProvider:
|
|
190
190
|
google_cse_id = os.environ.get("GOOGLE_CSE_ID", None)
|
191
191
|
if not google_api_key or not google_cse_id:
|
192
192
|
raise PrerequisiteError(
|
193
|
-
"GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.
|
193
|
+
"GOOGLE_CSE_ID and/or GOOGLE_CSE_API_KEY not set in the environment. Please ensure these variables are defined to use Google Custom Search with the web_search tool.\n\nLearn more about the Google web search provider at https://inspect.aisi.org.uk/tools.html#google-provider"
|
194
194
|
)
|
195
195
|
|
196
196
|
async def search(query: str, start_idx: int) -> list[SearchLink]:
|
@@ -192,7 +192,8 @@ async def copy_sandbox_environment_files(
|
|
192
192
|
target_env = environments.get(envname, None)
|
193
193
|
if not target_env:
|
194
194
|
raise RuntimeError(
|
195
|
-
f"Environment referenced in sample file not found: '{envname}:{file}'"
|
195
|
+
f"Environment referenced in sample file not found: '{envname}:{file}'. "
|
196
|
+
+ "Note that ':' can be optionally used to specify an explicit environment name for sample files (e.g. 'envname:file') so cannot be used as a character within filenames."
|
196
197
|
)
|
197
198
|
else:
|
198
199
|
target_env = default_environment
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import abc
|
4
|
+
import logging
|
4
5
|
from dataclasses import dataclass, field
|
5
6
|
from typing import (
|
6
7
|
Annotated,
|
@@ -17,8 +18,12 @@ from typing import (
|
|
17
18
|
|
18
19
|
from pydantic import BaseModel, Field, model_validator
|
19
20
|
|
21
|
+
from inspect_ai._util.logger import warn_once
|
22
|
+
|
20
23
|
from .._subprocess import ExecResult
|
21
24
|
|
25
|
+
logger = logging.getLogger(__name__)
|
26
|
+
|
22
27
|
ST = TypeVar("ST", bound="SandboxEnvironment")
|
23
28
|
|
24
29
|
TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
|
@@ -381,11 +386,21 @@ def resolve_sandbox_environment(
|
|
381
386
|
return None
|
382
387
|
|
383
388
|
|
384
|
-
def deserialize_sandbox_specific_config(
|
389
|
+
def deserialize_sandbox_specific_config(
|
390
|
+
type: str, config: dict[str, Any]
|
391
|
+
) -> BaseModel | dict[str, Any]:
|
385
392
|
# Avoid circular import
|
386
393
|
from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
|
387
394
|
|
388
|
-
|
395
|
+
try:
|
396
|
+
sandboxenv_type = registry_find_sandboxenv(type)
|
397
|
+
except ValueError:
|
398
|
+
warn_once(
|
399
|
+
logger,
|
400
|
+
f"Could not find sandbox environment plugin for type '{type}'. "
|
401
|
+
"Ensure the plugin is installed in your environment.",
|
402
|
+
)
|
403
|
+
return config
|
389
404
|
config_deserialize = cast(
|
390
405
|
ConfigDeserialize, getattr(sandboxenv_type, "config_deserialize")
|
391
406
|
)
|
@@ -1,10 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.75
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
7
|
-
Project-URL: Documentation, https://inspect.
|
7
|
+
Project-URL: Documentation, https://inspect.aisi.org.uk/
|
8
8
|
Project-URL: Source Code, https://github.com/UKGovernmentBEIS/inspect_ai
|
9
9
|
Project-URL: Issue Tracker, https://github.com/UKGovernmentBEIS/inspect_ai/issues
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
@@ -97,13 +97,13 @@ Provides-Extra: dist
|
|
97
97
|
Requires-Dist: twine; extra == "dist"
|
98
98
|
Requires-Dist: build; extra == "dist"
|
99
99
|
|
100
|
-
[<img width="295" src="https://inspect.
|
100
|
+
[<img width="295" src="https://inspect.aisi.org.uk/images/aisi-logo.svg" />](https://aisi.gov.uk/)
|
101
101
|
|
102
102
|
Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Security Institute](https://aisi.gov.uk/).
|
103
103
|
|
104
104
|
Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages.
|
105
105
|
|
106
|
-
To get started with Inspect, please see the documentation at <https://inspect.
|
106
|
+
To get started with Inspect, please see the documentation at <https://inspect.aisi.org.uk/>.
|
107
107
|
|
108
108
|
***
|
109
109
|
|