inspect-ai 0.3.105__py3-none-any.whl → 0.3.107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/context.py +5 -0
- inspect_ai/_eval/eval.py +113 -1
- inspect_ai/_eval/run.py +10 -9
- inspect_ai/_util/eval_task_group.py +15 -0
- inspect_ai/agent/_react.py +38 -15
- inspect_ai/model/_providers/_openai_web_search.py +1 -1
- inspect_ai/model/_providers/groq.py +5 -0
- inspect_ai/model/_providers/perplexity.py +27 -1
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/tool/_tools/_web_search/_web_search.py +8 -3
- {inspect_ai-0.3.105.dist-info → inspect_ai-0.3.107.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.105.dist-info → inspect_ai-0.3.107.dist-info}/RECORD +16 -15
- {inspect_ai-0.3.105.dist-info → inspect_ai-0.3.107.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.105.dist-info → inspect_ai-0.3.107.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.105.dist-info → inspect_ai-0.3.107.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.105.dist-info → inspect_ai-0.3.107.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/context.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
from anyio.abc import TaskGroup
|
2
|
+
|
1
3
|
from inspect_ai._util.dotenv import init_dotenv
|
4
|
+
from inspect_ai._util.eval_task_group import init_eval_task_group
|
2
5
|
from inspect_ai._util.hooks import init_hooks
|
3
6
|
from inspect_ai._util.logger import init_logger
|
4
7
|
from inspect_ai.approval._apply import have_tool_approval, init_tool_approval
|
@@ -19,6 +22,7 @@ def init_eval_context(
|
|
19
22
|
log_level: str | None,
|
20
23
|
log_level_transcript: str | None,
|
21
24
|
max_subprocesses: int | None = None,
|
25
|
+
task_group: TaskGroup | None = None,
|
22
26
|
) -> None:
|
23
27
|
init_dotenv()
|
24
28
|
init_logger(log_level, log_level_transcript)
|
@@ -27,6 +31,7 @@ def init_eval_context(
|
|
27
31
|
init_hooks()
|
28
32
|
init_active_samples()
|
29
33
|
init_human_approval_manager()
|
34
|
+
init_eval_task_group(task_group)
|
30
35
|
|
31
36
|
|
32
37
|
def init_task_context(
|
inspect_ai/_eval/eval.py
CHANGED
@@ -4,11 +4,15 @@ import sys
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import Any, Literal, cast
|
6
6
|
|
7
|
+
import anyio
|
8
|
+
from anyio.abc import TaskGroup
|
9
|
+
|
7
10
|
from inspect_ai._eval.task.task import resolve_model_roles
|
8
11
|
from inspect_ai._util.notgiven import NOT_GIVEN, NotGiven
|
9
12
|
from inspect_ai.agent._agent import Agent, is_agent
|
10
13
|
from inspect_ai.agent._as_solver import as_solver
|
11
14
|
from inspect_ai.log._model import model_roles_config_to_model_roles
|
15
|
+
from inspect_ai.util._anyio import inner_exception
|
12
16
|
|
13
17
|
if sys.version_info < (3, 11):
|
14
18
|
from exceptiongroup import ExceptionGroup
|
@@ -359,6 +363,112 @@ async def eval_async(
|
|
359
363
|
Returns:
|
360
364
|
List of EvalLog (one for each task)
|
361
365
|
"""
|
366
|
+
result: list[EvalLog] | None = None
|
367
|
+
|
368
|
+
async def run(tg: TaskGroup) -> None:
|
369
|
+
try:
|
370
|
+
nonlocal result
|
371
|
+
result = await _eval_async_inner(
|
372
|
+
tg=tg,
|
373
|
+
tasks=tasks,
|
374
|
+
model=model,
|
375
|
+
model_base_url=model_base_url,
|
376
|
+
model_args=model_args,
|
377
|
+
model_roles=model_roles,
|
378
|
+
task_args=task_args,
|
379
|
+
sandbox=sandbox,
|
380
|
+
sandbox_cleanup=sandbox_cleanup,
|
381
|
+
solver=solver,
|
382
|
+
tags=tags,
|
383
|
+
metadata=metadata,
|
384
|
+
approval=approval,
|
385
|
+
log_level=log_level,
|
386
|
+
log_level_transcript=log_level_transcript,
|
387
|
+
log_dir=log_dir,
|
388
|
+
log_format=log_format,
|
389
|
+
limit=limit,
|
390
|
+
sample_id=sample_id,
|
391
|
+
epochs=epochs,
|
392
|
+
fail_on_error=fail_on_error,
|
393
|
+
retry_on_error=retry_on_error,
|
394
|
+
debug_errors=debug_errors,
|
395
|
+
message_limit=message_limit,
|
396
|
+
token_limit=token_limit,
|
397
|
+
time_limit=time_limit,
|
398
|
+
working_limit=working_limit,
|
399
|
+
max_samples=max_samples,
|
400
|
+
max_tasks=max_tasks,
|
401
|
+
max_subprocesses=max_subprocesses,
|
402
|
+
max_sandboxes=max_sandboxes,
|
403
|
+
log_samples=log_samples,
|
404
|
+
log_realtime=log_realtime,
|
405
|
+
log_images=log_images,
|
406
|
+
log_buffer=log_buffer,
|
407
|
+
log_shared=log_shared,
|
408
|
+
log_header_only=log_header_only,
|
409
|
+
score=score,
|
410
|
+
score_display=score_display,
|
411
|
+
**kwargs,
|
412
|
+
)
|
413
|
+
finally:
|
414
|
+
tg.cancel_scope.cancel()
|
415
|
+
|
416
|
+
try:
|
417
|
+
async with anyio.create_task_group() as tg:
|
418
|
+
tg.start_soon(run, tg)
|
419
|
+
except Exception as ex:
|
420
|
+
raise inner_exception(ex)
|
421
|
+
except anyio.get_cancelled_exc_class():
|
422
|
+
# Cancelled exceptions are expected and handled by _eval_async_inner
|
423
|
+
pass
|
424
|
+
|
425
|
+
assert result is not None, "Eval async did not return a result."
|
426
|
+
|
427
|
+
return result
|
428
|
+
|
429
|
+
|
430
|
+
async def _eval_async_inner(
|
431
|
+
tg: TaskGroup,
|
432
|
+
tasks: Tasks,
|
433
|
+
model: str | Model | list[str] | list[Model] | None | NotGiven = NOT_GIVEN,
|
434
|
+
model_base_url: str | None = None,
|
435
|
+
model_args: dict[str, Any] | str = dict(),
|
436
|
+
model_roles: dict[str, str | Model] | None = None,
|
437
|
+
task_args: dict[str, Any] | str = dict(),
|
438
|
+
sandbox: SandboxEnvironmentType | None = None,
|
439
|
+
sandbox_cleanup: bool | None = None,
|
440
|
+
solver: Solver | SolverSpec | Agent | list[Solver] | None = None,
|
441
|
+
tags: list[str] | None = None,
|
442
|
+
metadata: dict[str, Any] | None = None,
|
443
|
+
approval: str | list[ApprovalPolicy] | ApprovalPolicyConfig | None = None,
|
444
|
+
log_level: str | None = None,
|
445
|
+
log_level_transcript: str | None = None,
|
446
|
+
log_dir: str | None = None,
|
447
|
+
log_format: Literal["eval", "json"] | None = None,
|
448
|
+
limit: int | tuple[int, int] | None = None,
|
449
|
+
sample_id: str | int | list[str] | list[int] | list[str | int] | None = None,
|
450
|
+
epochs: int | Epochs | None = None,
|
451
|
+
fail_on_error: bool | float | None = None,
|
452
|
+
retry_on_error: int | None = None,
|
453
|
+
debug_errors: bool | None = None,
|
454
|
+
message_limit: int | None = None,
|
455
|
+
token_limit: int | None = None,
|
456
|
+
time_limit: int | None = None,
|
457
|
+
working_limit: int | None = None,
|
458
|
+
max_samples: int | None = None,
|
459
|
+
max_tasks: int | None = None,
|
460
|
+
max_subprocesses: int | None = None,
|
461
|
+
max_sandboxes: int | None = None,
|
462
|
+
log_samples: bool | None = None,
|
463
|
+
log_realtime: bool | None = None,
|
464
|
+
log_images: bool | None = None,
|
465
|
+
log_buffer: int | None = None,
|
466
|
+
log_shared: bool | int | None = None,
|
467
|
+
log_header_only: bool | None = None,
|
468
|
+
score: bool = True,
|
469
|
+
score_display: bool | None = None,
|
470
|
+
**kwargs: Unpack[GenerateConfigArgs],
|
471
|
+
) -> list[EvalLog]:
|
362
472
|
# only a single call to eval_async can be active at a time, this used
|
363
473
|
# to be due to running tasks switching to the task's directory, however
|
364
474
|
# that feature no longer exists so we may be able to revisit this
|
@@ -387,6 +497,7 @@ async def eval_async(
|
|
387
497
|
max_subprocesses=max_subprocesses,
|
388
498
|
log_level=log_level,
|
389
499
|
log_level_transcript=log_level_transcript,
|
500
|
+
task_group=tg,
|
390
501
|
**kwargs,
|
391
502
|
)
|
392
503
|
|
@@ -934,10 +1045,11 @@ def eval_init(
|
|
934
1045
|
max_subprocesses: int | None = None,
|
935
1046
|
log_level: str | None = None,
|
936
1047
|
log_level_transcript: str | None = None,
|
1048
|
+
task_group: TaskGroup | None = None,
|
937
1049
|
**kwargs: Unpack[GenerateConfigArgs],
|
938
1050
|
) -> list[Model]:
|
939
1051
|
# init eval context
|
940
|
-
init_eval_context(log_level, log_level_transcript, max_subprocesses)
|
1052
|
+
init_eval_context(log_level, log_level_transcript, max_subprocesses, task_group)
|
941
1053
|
|
942
1054
|
# resolve model and task args
|
943
1055
|
model_args = resolve_args(model_args)
|
inspect_ai/_eval/run.py
CHANGED
@@ -522,15 +522,16 @@ async def startup_sandbox_environments(
|
|
522
522
|
|
523
523
|
# return shutdown method
|
524
524
|
async def shutdown() -> None:
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
525
|
+
with anyio.CancelScope(shield=True):
|
526
|
+
for cleanup_jobs in cleanups:
|
527
|
+
try:
|
528
|
+
cleanup_fn, config, task_run_dir = cleanup_jobs
|
529
|
+
with chdir(task_run_dir):
|
530
|
+
await cleanup_fn("shutdown", config, cleanup)
|
531
|
+
except BaseException as ex:
|
532
|
+
log.warning(
|
533
|
+
f"Error occurred shutting down sandbox environments: {exception_message(ex)}"
|
534
|
+
)
|
534
535
|
|
535
536
|
return shutdown
|
536
537
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from anyio.abc import TaskGroup
|
2
|
+
|
3
|
+
_eval_task_group: TaskGroup | None = None
|
4
|
+
|
5
|
+
|
6
|
+
def init_eval_task_group(tg: TaskGroup | None) -> None:
|
7
|
+
global _eval_task_group
|
8
|
+
_eval_task_group = tg
|
9
|
+
|
10
|
+
|
11
|
+
def eval_task_group() -> TaskGroup:
|
12
|
+
global _eval_task_group
|
13
|
+
if _eval_task_group is None:
|
14
|
+
raise RuntimeError("Task group has not been initialized")
|
15
|
+
return _eval_task_group
|
inspect_ai/agent/_react.py
CHANGED
@@ -82,9 +82,8 @@ def react(
|
|
82
82
|
the submit tool within the message. Alternatively, an async function
|
83
83
|
to call to determine whether the loop should continue and what message
|
84
84
|
to play back. Note that this function is called on _every_ iteration of
|
85
|
-
the loop
|
86
|
-
|
87
|
-
calls were made.
|
85
|
+
the loop so if you only want to send a message back when the model fails
|
86
|
+
to call tools you need to code that behavior explicitly.
|
88
87
|
truncation: Truncate the conversation history in the event of a context
|
89
88
|
window overflow. Defaults to "disabled" which does no truncation. Pass
|
90
89
|
"auto" to use `trim_messages()` to reduce the context size. Pass a
|
@@ -246,13 +245,12 @@ def react(
|
|
246
245
|
)
|
247
246
|
)
|
248
247
|
elif isinstance(do_continue, str):
|
249
|
-
#
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
content=do_continue.format(submit=submit_tool.name)
|
254
|
-
)
|
248
|
+
# send back the user message
|
249
|
+
state.messages.append(
|
250
|
+
ChatMessageUser(
|
251
|
+
content=do_continue.format(submit=submit_tool.name)
|
255
252
|
)
|
253
|
+
)
|
256
254
|
else: # do_continue is False
|
257
255
|
break
|
258
256
|
|
@@ -328,11 +326,14 @@ def react_no_submit(
|
|
328
326
|
if on_continue:
|
329
327
|
do_continue = await _call_on_continue(on_continue, state)
|
330
328
|
if do_continue is True:
|
331
|
-
do_continue = DEFAULT_CONTINUE_PROMOT_NO_SUBMIT
|
332
|
-
if do_continue:
|
333
|
-
# send back user message if there are no tool calls
|
334
329
|
if not state.output.message.tool_calls:
|
335
|
-
state.messages.append(
|
330
|
+
state.messages.append(
|
331
|
+
ChatMessageUser(
|
332
|
+
content=DEFAULT_CONTINUE_PROMOT_NO_SUBMIT
|
333
|
+
)
|
334
|
+
)
|
335
|
+
elif isinstance(do_continue, str):
|
336
|
+
state.messages.append(ChatMessageUser(content=do_continue))
|
336
337
|
else:
|
337
338
|
break
|
338
339
|
elif not state.output.message.tool_calls:
|
@@ -471,12 +472,34 @@ def _remove_submit_tool(
|
|
471
472
|
|
472
473
|
# remove submit tool from assistant messages
|
473
474
|
if isinstance(message, ChatMessageAssistant) and message.tool_calls:
|
474
|
-
|
475
|
+
new_tools_calls = [
|
475
476
|
tool_call
|
476
477
|
for tool_call in message.tool_calls
|
477
478
|
if tool_call.function != submit_name
|
478
479
|
]
|
479
|
-
|
480
|
+
|
481
|
+
# If a submit tool call was removed, we need to update the message
|
482
|
+
if len(new_tools_calls) < len(message.tool_calls):
|
483
|
+
message = message.model_copy(
|
484
|
+
update=dict(
|
485
|
+
tool_calls=new_tools_calls,
|
486
|
+
# Some models (OpenAI) don't like to see the reasoning
|
487
|
+
# content item that led to the submit tool call, so we
|
488
|
+
# have to remove it too.
|
489
|
+
content=(
|
490
|
+
[
|
491
|
+
content
|
492
|
+
for content in message.content
|
493
|
+
if (
|
494
|
+
isinstance(content, str)
|
495
|
+
or content.type != "reasoning"
|
496
|
+
)
|
497
|
+
]
|
498
|
+
if isinstance(message.content, list)
|
499
|
+
else message.content
|
500
|
+
),
|
501
|
+
)
|
502
|
+
)
|
480
503
|
|
481
504
|
# always append message
|
482
505
|
filtered.append(message)
|
@@ -14,7 +14,7 @@ def maybe_web_search_tool(model_name: str, tool: ToolInfo) -> WebSearchToolParam
|
|
14
14
|
tool.name == "web_search"
|
15
15
|
and tool.options
|
16
16
|
and "openai" in tool.options
|
17
|
-
and model_name in COMPATIBLE_MODELS
|
17
|
+
and any(model_name.startswith(model) for model in COMPATIBLE_MODELS)
|
18
18
|
)
|
19
19
|
else None
|
20
20
|
)
|
@@ -156,6 +156,11 @@ class GroqAPI(ModelAPI):
|
|
156
156
|
"completion_time": completion.usage.completion_time,
|
157
157
|
"total_time": completion.usage.total_time,
|
158
158
|
}
|
159
|
+
if completion.choices[0].message.executed_tools:
|
160
|
+
metadata["executed_tools"] = [
|
161
|
+
tool.model_dump()
|
162
|
+
for tool in completion.choices[0].message.executed_tools
|
163
|
+
]
|
159
164
|
|
160
165
|
# extract output
|
161
166
|
choices = self._chat_choices_from_response(completion, tools)
|
@@ -49,7 +49,33 @@ class PerplexityAPI(OpenAICompatibleAPI):
|
|
49
49
|
tool_choice: "ToolChoice",
|
50
50
|
config: GenerateConfig,
|
51
51
|
) -> tuple[ModelOutput | Exception, "ModelCall"]:
|
52
|
-
|
52
|
+
search_options: dict[str, Any] | None = None
|
53
|
+
for tool in tools:
|
54
|
+
if (
|
55
|
+
tool.name == "web_search"
|
56
|
+
and tool.options
|
57
|
+
and "perplexity" in tool.options
|
58
|
+
):
|
59
|
+
maybe_opts = tool.options["perplexity"]
|
60
|
+
if maybe_opts is not None:
|
61
|
+
if maybe_opts is True:
|
62
|
+
search_options = {}
|
63
|
+
elif isinstance(maybe_opts, dict):
|
64
|
+
search_options = maybe_opts
|
65
|
+
else:
|
66
|
+
raise TypeError(
|
67
|
+
f"Expected a dictionary or True for perplexity_options, got {type(maybe_opts)}"
|
68
|
+
)
|
69
|
+
else:
|
70
|
+
raise ValueError(
|
71
|
+
"Perplexity does not support tools other than web_search with perplexity options"
|
72
|
+
)
|
73
|
+
|
74
|
+
if search_options:
|
75
|
+
extra_body = {**(config.extra_body or {}), **search_options}
|
76
|
+
config = config.merge(GenerateConfig(extra_body=extra_body))
|
77
|
+
|
78
|
+
result = await super().generate(input, [], tool_choice, config)
|
53
79
|
output, call = cast(tuple[ModelOutput, "ModelCall"], result)
|
54
80
|
|
55
81
|
if self._response:
|
@@ -18,7 +18,7 @@ from ._tavily import TavilyOptions, tavily_search_provider
|
|
18
18
|
from ._web_search_provider import SearchProvider
|
19
19
|
|
20
20
|
Provider: TypeAlias = Literal[
|
21
|
-
"gemini", "openai", "anthropic", "tavily", "google", "exa"
|
21
|
+
"gemini", "openai", "anthropic", "perplexity", "tavily", "google", "exa"
|
22
22
|
]
|
23
23
|
valid_providers = set(get_args(Provider))
|
24
24
|
|
@@ -35,6 +35,7 @@ class Providers(TypedDict, total=False):
|
|
35
35
|
openai: dict[str, Any] | Literal[True]
|
36
36
|
anthropic: dict[str, Any] | Literal[True]
|
37
37
|
gemini: dict[str, Any] | Literal[True]
|
38
|
+
perplexity: dict[str, Any] | Literal[True]
|
38
39
|
tavily: dict[str, Any] | Literal[True]
|
39
40
|
google: dict[str, Any] | Literal[True]
|
40
41
|
exa: dict[str, Any] | Literal[True]
|
@@ -44,6 +45,7 @@ class _NormalizedProviders(TypedDict, total=False):
|
|
44
45
|
openai: dict[str, Any]
|
45
46
|
anthropic: dict[str, Any]
|
46
47
|
gemini: dict[str, Any]
|
48
|
+
perplexity: dict[str, Any]
|
47
49
|
tavily: dict[str, Any]
|
48
50
|
google: dict[str, Any]
|
49
51
|
exa: dict[str, Any]
|
@@ -67,7 +69,7 @@ def web_search(
|
|
67
69
|
Web searches are executed using a provider. Providers are split
|
68
70
|
into two categories:
|
69
71
|
|
70
|
-
- Internal providers: "openai", "anthropic" - these use the model's built-in
|
72
|
+
- Internal providers: "openai", "anthropic", "gemini", "perplexity" - these use the model's built-in
|
71
73
|
search capability and do not require separate API keys. These work only for
|
72
74
|
their respective model provider (e.g. the "openai" search provider
|
73
75
|
works only for `openai/*` models).
|
@@ -84,7 +86,7 @@ def web_search(
|
|
84
86
|
|
85
87
|
Args:
|
86
88
|
providers: Configuration for the search providers to use. Currently supported
|
87
|
-
providers are "openai", "anthropic", "tavily", "google", and "exa". The
|
89
|
+
providers are "openai", "anthropic", "perplexity", "tavily", "google", and "exa". The
|
88
90
|
`providers` parameter supports several formats based on either a `str`
|
89
91
|
specifying a provider or a `dict` whose keys are the provider names and
|
90
92
|
whose values are the provider-specific options. A single value or a list
|
@@ -121,6 +123,9 @@ def web_search(
|
|
121
123
|
- anthropic: Supports Anthropic's web search parameters.
|
122
124
|
See https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/web-search-tool#tool-definition
|
123
125
|
|
126
|
+
- perplexity: Supports Perplexity's web search parameters.
|
127
|
+
See https://docs.perplexity.ai/api-reference/chat-completions-post
|
128
|
+
|
124
129
|
- tavily: Supports options like `max_results`, `search_depth`, etc.
|
125
130
|
See https://docs.tavily.com/documentation/api-reference/endpoint/search
|
126
131
|
|
@@ -46,13 +46,13 @@ inspect_ai/_display/textual/widgets/toggle.py,sha256=ToYs-S4n90yuxWcAW2OTg6AbRf0
|
|
46
46
|
inspect_ai/_display/textual/widgets/transcript.py,sha256=fmCJwe1EZ7bjeB6DXakQ2l3aoytEW_wdGTCN1Hea5uw,12558
|
47
47
|
inspect_ai/_display/textual/widgets/vscode.py,sha256=SAIPO8VOkT_CFIfnCP_XxKixojdYXxMNdYU3Z2mq5Ek,1298
|
48
48
|
inspect_ai/_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
|
-
inspect_ai/_eval/context.py,sha256=
|
50
|
-
inspect_ai/_eval/eval.py,sha256=
|
49
|
+
inspect_ai/_eval/context.py,sha256=vEOqPNG3eWdkspaLYEJJE59D_WxH_3ZPHc3tP0MjwHg,1584
|
50
|
+
inspect_ai/_eval/eval.py,sha256=DlDpqMWpF3JIgJ9Xeb0QJ9E99Dm1VW48viNEUwj6Lis,48279
|
51
51
|
inspect_ai/_eval/evalset.py,sha256=cGLAX6qnMMi-LTenLtu47wv0JJTjAvFLbE6zKfZbfTg,25112
|
52
52
|
inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
|
53
53
|
inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
|
54
54
|
inspect_ai/_eval/registry.py,sha256=IMyF_Ru11DrqjFe1yZJvghQcJSfssQ08wQqTt_F38Ag,5570
|
55
|
-
inspect_ai/_eval/run.py,sha256=
|
55
|
+
inspect_ai/_eval/run.py,sha256=Wlmc4pE5hUTyGRn2iYC1xLleyI2R8ffIoCPkITTVePc,21590
|
56
56
|
inspect_ai/_eval/score.py,sha256=KodaNhMCE1KV8qS33zj7Q8I0LD080WRCb32tyg1956w,10443
|
57
57
|
inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
|
58
58
|
inspect_ai/_eval/task/constants.py,sha256=_YGvlhJL7kC7_3OCbyjDabnJNX2kopH_kRxwODF9cKw,117
|
@@ -86,6 +86,7 @@ inspect_ai/_util/dotenv.py,sha256=9KsPrGFYUVsBGDTnmDuvtptkiOuoxztVaIRdID58EuA,34
|
|
86
86
|
inspect_ai/_util/entrypoints.py,sha256=FnK32vIRvSFdH80l5H0o6oiJif837oeDrl5N10_P-xo,1260
|
87
87
|
inspect_ai/_util/environ.py,sha256=COdX6sqJIq3ikNQD2gR2nMT7yk0qW8x4EjlJwHS9A_M,1428
|
88
88
|
inspect_ai/_util/error.py,sha256=NIYCkOXGMUF1_zSHpeTBKzBb79_llQZvvf0TGYHO57Y,2333
|
89
|
+
inspect_ai/_util/eval_task_group.py,sha256=NBnvF-VuPT-foZ4_7ITjJ61LNQ29F0cuaTIh-y-FbVo,380
|
89
90
|
inspect_ai/_util/exception.py,sha256=coVT0bQy6sADWSvTUaVVDDKnb1XCzREfuVi9UOvx_S0,149
|
90
91
|
inspect_ai/_util/file.py,sha256=OqSe8RXF9OBWm8Rzvnsnu854ZqQWl-6aFlUrbjIVHoA,13432
|
91
92
|
inspect_ai/_util/format.py,sha256=4TQ1FE4-TDNlHcr0r6dfsjv84sV11C9ouTfi55W-yIs,3323
|
@@ -519,7 +520,7 @@ inspect_ai/agent/_as_solver.py,sha256=glOKzItIPsveWDGlk2igLfFDOix_NlEkAtyQ6YsWB2
|
|
519
520
|
inspect_ai/agent/_as_tool.py,sha256=-NGZUFAEimvSpog0UmNtYDMlbbuKaWnIgwNnMd_fffM,4912
|
520
521
|
inspect_ai/agent/_filter.py,sha256=qnT0HbT4edpDi0MwXY3Q3It2pzNRkTRXZDOqfCwMY6M,1234
|
521
522
|
inspect_ai/agent/_handoff.py,sha256=fonoLtC9CxCt7Ya_EbhHU-1indqVpOaTJ2b8-9BoM_k,3848
|
522
|
-
inspect_ai/agent/_react.py,sha256=
|
523
|
+
inspect_ai/agent/_react.py,sha256=oFUy2w96d9nSYeLBoeqLv1GuBjAFh0glPaPJ8M8jOPE,20003
|
523
524
|
inspect_ai/agent/_run.py,sha256=wXAE26-w0W1Brn5KTEQH5Esl_ZrwH37Po18yX-EZQsI,3171
|
524
525
|
inspect_ai/agent/_types.py,sha256=UeXBI_p8VVgEeMqenJjFRfyAWqWBKugUvVS2eJoIIUw,4560
|
525
526
|
inspect_ai/agent/_bridge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -638,14 +639,14 @@ inspect_ai/model/_trim.py,sha256=y2bSok4y844spv7vdjUVGnxWG93WcslAsXyplWDoRqU,337
|
|
638
639
|
inspect_ai/model/_providers/_anthropic_citations.py,sha256=8Wub6F8vvup0-e-BKV3wcrT4n1_7q9Rimv0xnMQAEZM,5767
|
639
640
|
inspect_ai/model/_providers/_google_citations.py,sha256=KtqQdvUrWV1lGYfD7aLOIwUbIqZbVJDCoX-uCMHBZz8,3573
|
640
641
|
inspect_ai/model/_providers/_openai_computer_use.py,sha256=vbKkYLhqNuX16zuWfg5MaGp9H8URrPcLhKQ1pDsZtPo,5943
|
641
|
-
inspect_ai/model/_providers/_openai_web_search.py,sha256=
|
642
|
+
inspect_ai/model/_providers/_openai_web_search.py,sha256=8hTUNmt7nYCWMn1RjxevWdeZL7FyLi6T2AWKOciNz3w,1228
|
642
643
|
inspect_ai/model/_providers/anthropic.py,sha256=Dzu_5nWH4Xx1XklARkjCP0jkIvt8Nf5SDmzgblPGsf4,42201
|
643
644
|
inspect_ai/model/_providers/azureai.py,sha256=KzgPYtUMsqEZZTHWYWYLnnRd6wh4qqlqqZL2JwvraLs,16738
|
644
645
|
inspect_ai/model/_providers/bedrock.py,sha256=G252v6gUXtT56M4JaLLY7tEw2AJVQFucjeFgv0okhgo,23999
|
645
646
|
inspect_ai/model/_providers/cloudflare.py,sha256=9yHfA5qbKWjzOfOzCJ_u8CZsH_U7AolAWLxvLBXKrhM,2375
|
646
647
|
inspect_ai/model/_providers/google.py,sha256=6omSismcBEFH_2lVhOhLF5A3ceMpuZiARxWD91n2R6E,35748
|
647
648
|
inspect_ai/model/_providers/grok.py,sha256=iAPXmZMR7VWPq6EIwRsoUJr_TR6b5kTt-Fkba1pogGQ,1267
|
648
|
-
inspect_ai/model/_providers/groq.py,sha256=
|
649
|
+
inspect_ai/model/_providers/groq.py,sha256=uPzH8gmJhBOwgXWAdiqtCYlBi79E051wr6-bxUrMUwA,12503
|
649
650
|
inspect_ai/model/_providers/hf.py,sha256=jyXi4qyq2hdsp1waB2ON5m8f9mpE2h1GFD7Tu_phCEo,19115
|
650
651
|
inspect_ai/model/_providers/llama_cpp_python.py,sha256=qVGpR7qnuP3wbYfFqSTkSc63sYsNnK1XC5IV-Ac0Uu4,618
|
651
652
|
inspect_ai/model/_providers/mistral.py,sha256=TNVrwS1gJ-ClxPvDnhGQBelLtEm6r4eF4t25H0pJwQw,18152
|
@@ -657,8 +658,8 @@ inspect_ai/model/_providers/openai_compatible.py,sha256=2dGx2pJSj6o0uJWKE3mimX47
|
|
657
658
|
inspect_ai/model/_providers/openai_o1.py,sha256=ahdXt2TFtPTdDvSGVQw7EaVindfbFbY2pLZrrB45rFg,13305
|
658
659
|
inspect_ai/model/_providers/openai_responses.py,sha256=eNDDCyIfBPCy_gTbpRDr5UicGR_8xkIq9TBTZT7wU7w,6685
|
659
660
|
inspect_ai/model/_providers/openrouter.py,sha256=sm-XlzcevoZfoR4C00jCxlfeL2NlnPVpJJA1mFFgkgw,4990
|
660
|
-
inspect_ai/model/_providers/perplexity.py,sha256=
|
661
|
-
inspect_ai/model/_providers/providers.py,sha256=
|
661
|
+
inspect_ai/model/_providers/perplexity.py,sha256=LUvy4kk_v4--zA08yqgQtPj4COecXP7Jym8ThtUxOGw,5954
|
662
|
+
inspect_ai/model/_providers/providers.py,sha256=w8QpVV0Hj_pQjOEC3W-bisJraSwe3QbovH8-sYBz2Cc,6806
|
662
663
|
inspect_ai/model/_providers/sglang.py,sha256=vmIIFC-wyltCAvewvgMVRs4jfp9wFSfinTuNo9TQxM8,8750
|
663
664
|
inspect_ai/model/_providers/together.py,sha256=EUNag5nraqo3GvzwKB1jukhZj-GACxsCGPrBC4VR2MU,9786
|
664
665
|
inspect_ai/model/_providers/vertex.py,sha256=_8wsThFHIpuwJ5Bvmx8PsEYKqauUYZ1v8B3dn41CtFw,17328
|
@@ -745,7 +746,7 @@ inspect_ai/tool/_tools/_web_search/_base_http_provider.py,sha256=ww9SbvrXa5MNxwR
|
|
745
746
|
inspect_ai/tool/_tools/_web_search/_exa.py,sha256=vH5aLLsWRYpTaSkk1jN6os3x-eE8uAqT_U5ToJaIoP8,2372
|
746
747
|
inspect_ai/tool/_tools/_web_search/_google.py,sha256=CD1ckTpw4prwkancWz6_aTWwGZ4xwKfbDcqAPiSLNS4,7239
|
747
748
|
inspect_ai/tool/_tools/_web_search/_tavily.py,sha256=4i9lqhWTgT_cD-cocDQjW-eJhRZZ6pjzlFfKYe1rfdg,3053
|
748
|
-
inspect_ai/tool/_tools/_web_search/_web_search.py,sha256=
|
749
|
+
inspect_ai/tool/_tools/_web_search/_web_search.py,sha256=qB6O-NAdt4FYFshf02r71nIOYETuSOdMpAUgEeHw4BE,11308
|
749
750
|
inspect_ai/tool/_tools/_web_search/_web_search_provider.py,sha256=SvbQd7l2wqz5cAdNk9zrXX8NqOoWd-FEtF-6zyLA7MA,208
|
750
751
|
inspect_ai/util/__init__.py,sha256=pYCxgPSDHdN9S2UtYhL8l_oUkiVFInQ_-waSFrD09x8,2281
|
751
752
|
inspect_ai/util/_anyio.py,sha256=ImV_Q9oJ0XT0Fy6qa68OHpCzcUbfxptbHAjYWre-m2U,1541
|
@@ -783,9 +784,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
|
|
783
784
|
inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
|
784
785
|
inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
|
785
786
|
inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
|
786
|
-
inspect_ai-0.3.
|
787
|
-
inspect_ai-0.3.
|
788
|
-
inspect_ai-0.3.
|
789
|
-
inspect_ai-0.3.
|
790
|
-
inspect_ai-0.3.
|
791
|
-
inspect_ai-0.3.
|
787
|
+
inspect_ai-0.3.107.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
|
788
|
+
inspect_ai-0.3.107.dist-info/METADATA,sha256=UDm4EwaGEHfBNNwE6qudz_TpT9awiq0wByKkjQn_sFE,5467
|
789
|
+
inspect_ai-0.3.107.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
790
|
+
inspect_ai-0.3.107.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
|
791
|
+
inspect_ai-0.3.107.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
|
792
|
+
inspect_ai-0.3.107.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|