PyPI - inspect-ai - Versions diffs - 0.3.59__py3-none-any.whl → 0.3.60__py3-none-any.whl - Mend

inspect-ai 0.3.59py3-none-any.whl → 0.3.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

inspect_ai/_cli/eval.py +0 -7
inspect_ai/_display/textual/widgets/samples.py +1 -1
inspect_ai/_eval/eval.py +10 -1
inspect_ai/_eval/loader.py +79 -19
inspect_ai/_eval/registry.py +6 -0
inspect_ai/_eval/score.py +2 -1
inspect_ai/_eval/task/results.py +6 -5
inspect_ai/_eval/task/run.py +11 -11
inspect_ai/_view/www/dist/assets/index.js +262 -303
inspect_ai/_view/www/src/App.mjs +6 -6
inspect_ai/_view/www/src/Types.mjs +1 -1
inspect_ai/_view/www/src/api/Types.ts +133 -0
inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
inspect_ai/_view/www/src/api/api-http.ts +219 -0
inspect_ai/_view/www/src/api/api-shared.ts +47 -0
inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
inspect_ai/_view/www/src/api/index.ts +51 -0
inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
inspect_ai/_view/www/src/index.js +2 -2
inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
inspect_ai/_view/www/src/navbar/Navbar.mjs +1 -1
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +1 -1
inspect_ai/_view/www/src/samples/SampleList.mjs +1 -1
inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +14 -14
inspect_ai/_view/www/src/samples/SamplesTab.mjs +10 -10
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +1 -3
inspect_ai/_view/www/src/utils/vscode.ts +36 -0
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
inspect_ai/approval/_human/manager.py +1 -1
inspect_ai/model/_call_tools.py +55 -0
inspect_ai/model/_conversation.py +1 -4
inspect_ai/model/_generate_config.py +2 -8
inspect_ai/model/_model_output.py +15 -0
inspect_ai/model/_openai.py +383 -0
inspect_ai/model/_providers/anthropic.py +52 -11
inspect_ai/model/_providers/azureai.py +1 -1
inspect_ai/model/_providers/goodfire.py +248 -0
inspect_ai/model/_providers/groq.py +7 -3
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +2 -1
inspect_ai/model/_providers/openai.py +36 -202
inspect_ai/model/_providers/openai_o1.py +2 -4
inspect_ai/model/_providers/providers.py +22 -0
inspect_ai/model/_providers/together.py +4 -4
inspect_ai/model/_providers/util/__init__.py +2 -3
inspect_ai/model/_providers/util/hf_handler.py +1 -1
inspect_ai/model/_providers/util/llama31.py +1 -1
inspect_ai/model/_providers/util/util.py +0 -76
inspect_ai/scorer/_metric.py +3 -0
inspect_ai/scorer/_scorer.py +2 -1
inspect_ai/solver/__init__.py +2 -0
inspect_ai/solver/_basic_agent.py +1 -1
inspect_ai/solver/_bridge/__init__.py +3 -0
inspect_ai/solver/_bridge/bridge.py +100 -0
inspect_ai/solver/_bridge/patch.py +170 -0
inspect_ai/solver/_solver.py +6 -0
inspect_ai/util/_display.py +5 -0
inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +68 -63
inspect_ai/_view/www/src/api/Types.mjs +0 -117
inspect_ai/_view/www/src/api/api-http.mjs +0 -300
inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
inspect_ai/_view/www/src/api/index.mjs +0 -49
inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0

inspect_ai/_cli/eval.py CHANGED Viewed

@@ -314,12 +314,6 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help="Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
         envvar="INSPECT_EVAL_STOP_SEQS",
     )
-    @click.option(
-        "--suffix",
-        type=str,
-        help="The suffix that comes after a completion of inserted text. OpenAI only.",
-        envvar="INSPECT_EVAL_SUFFIX",
-    )
     @click.option(
         "--temperature",
         type=float,
@@ -439,7 +433,6 @@ def eval_command(
     logit_bias: str | None,
     seed: int | None,
     stop_seqs: str | None,
-    suffix: str | None,
     temperature: float | None,
     top_p: float | None,
     top_k: int | None,

inspect_ai/_display/textual/widgets/samples.py CHANGED Viewed

@@ -413,7 +413,7 @@ class SampleToolbar(Horizontal):
         grid-columns: auto auto 1fr auto auto;
     }}
     SampleToolbar #{STATUS_GROUP} {{
-        min-width: 20;
+        width: 22;
     }}
     SampleToolbar Button {{
         margin-bottom: 1;

inspect_ai/_eval/eval.py CHANGED Viewed

@@ -35,7 +35,12 @@ from inspect_ai.scorer._reducer import reducer_log_names
 from inspect_ai.solver._chain import chain
 from inspect_ai.solver._solver import Solver, SolverSpec
 from inspect_ai.util import SandboxEnvironmentType
-from inspect_ai.util._display import DisplayType, display_type, init_display_type
+from inspect_ai.util._display import (
+    DisplayType,
+    display_type,
+    display_type_initialized,
+    init_display_type,
+)
 from .context import init_eval_context
 from .loader import ResolvedTask, resolve_tasks
@@ -306,6 +311,10 @@ async def eval_async(
     _eval_async_running = True
+    # if we are called outside of eval() then set display type to "plain"
+    if not display_type_initialized():
+        init_display_type("plain")
     # resolve model and task args
     model_args = resolve_args(model_args)
     task_args = resolve_args(task_args)

inspect_ai/_eval/loader.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import ast
 import contextlib
+import inspect
 import os
 from dataclasses import dataclass, field
 from importlib.machinery import SourceFileLoader
@@ -9,11 +10,13 @@ from pathlib import Path
 from types import ModuleType
 from typing import Any, Callable, cast
+from typing_extensions import overload
 from inspect_ai._eval.task.util import task_file, task_run_dir
 from inspect_ai._util.decorator import parse_decorators
 from inspect_ai._util.error import PrerequisiteError
 from inspect_ai._util.logger import warn_once
-from inspect_ai._util.path import chdir_python
+from inspect_ai._util.path import chdir_python, cwd_relative_path
 from inspect_ai._util.registry import (
     RegistryInfo,
     is_registry_object,
@@ -23,6 +26,7 @@ from inspect_ai._util.registry import (
     registry_params,
 )
 from inspect_ai.model import Model, ModelName
+from inspect_ai.solver._bridge import bridge
 from inspect_ai.solver._solver import Solver, SolverSpec
 from inspect_ai.util import SandboxEnvironmentSpec, SandboxEnvironmentType
 from inspect_ai.util._sandbox.environment import resolve_sandbox_environment
@@ -334,6 +338,16 @@ def split_spec(spec: str) -> tuple[str, str | None]:
         return spec, None
+@overload
+def load_module(
+    module_path: Path, filter: Callable[[str], bool]
+) -> ModuleType | None: ...
+@overload
+def load_module(module_path: Path, filter: None = None) -> ModuleType: ...
 def load_module(
     module_path: Path, filter: Callable[[str], bool] | None = None
 ) -> ModuleType | None:
@@ -425,28 +439,74 @@ def solver_from_spec(spec: SolverSpec) -> Solver:
         else contextlib.nullcontext()
     )
+    # pretty solver name for error messages
+    pretty_solver_file = (
+        cwd_relative_path(solver_file.as_posix()) if solver_file else None
+    )
     with create_cm:
-        # if we have a file then we need to load it and (if required) determine the solver name
-        if solver_file is not None:
-            # load the module so that registry_create works
-            load_module(solver_file)
+        # if there is no solver file then just create from the registry by name
+        if solver_file is None:
+            if solver_name is None:
+                raise ValueError(f"Unable to resolve solver name from {spec.solver}")
+            return cast(Solver, registry_create("solver", solver_name, **spec.args))
-            # if there is no solver_name we need to discover the first @solver
+        # we do have a solver file
+        else:
+            # load the module and parse decorators
+            solver_module = load_module(solver_file)
+            decorators = parse_decorators(solver_file, "solver")
+            # if there is no solver_name see if we can discover it
             if solver_name is None:
-                solvers = parse_decorators(solver_file, "solver")
-                if len(solvers) == 0:
+                if len(decorators) == 1:
+                    # decorator based solver
+                    solver_name = decorators[0][0]
+                elif len(decorators) == 0:
+                    # see if we can find an agent based solver
+                    functions = [
+                        function
+                        for function in inspect.getmembers(
+                            solver_module, inspect.isfunction
+                        )
+                        if function[1].__module__ == solver_module.__name__
+                    ]
+                    agent_functions = [
+                        function
+                        for function in functions
+                        if "agent" in function[0] and not function[0].startswith("_")
+                    ]
+                    if len(agent_functions) == 1:
+                        # agent based solver
+                        solver_name = agent_functions[0][0]
+                    elif len(agent_functions) == 0:
+                        raise PrerequisiteError(
+                            f"The source file {pretty_solver_file} does not contain any @solver functions or agent functions."
+                        )
+                    else:
+                        raise PrerequisiteError(
+                            f"The source file {pretty_solver_file} has more than one agent function (qualify which agent using e.g. '{solver_file.name}@agent_fn')"
+                        )
+                else:
                     raise PrerequisiteError(
-                        f"The source file {solver_file.as_posix()} does not contain any @solver functions."
+                        f"The source file {pretty_solver_file} has more than one @solver function (qualify which solver using e.g. '{solver_file.name}y@solver_fn')"
                     )
-                if len(solvers) > 1:
-                    raise PrerequisiteError(
-                        f"The source file {solver_file.as_posix()} has more than one @solver function (qualify which solver using file.py@solver)"
-                    )
-                solver_name = solvers[0][0]
-        # make mypy happy and catch unexpected branching
-        if solver_name is None:
-            raise ValueError(f"Unable to resolve solver name from {spec.solver}")
+            # create decorator based solvers using the registry
+            if any(solver[0] == solver_name for solver in decorators):
+                return cast(Solver, registry_create("solver", solver_name, **spec.args))
-        solver = cast(Solver, registry_create("solver", solver_name, **spec.args))
-        return solver
+            # create agent based solvers by calling the function and wrapping it in bridge()
+            else:
+                agent_fn = getattr(solver_module, solver_name, None)
+                if inspect.isfunction(agent_fn):
+                    return bridge(agent_fn(**spec.args))
+                elif agent_fn is not None:
+                    raise PrerequisiteError(
+                        f"The object {solver_name} in file {pretty_solver_file} is not a Python function."
+                    )
+                else:
+                    raise PrerequisiteError(
+                        f"The function {solver_name} was not found in file {pretty_solver_file}."
+                    )

inspect_ai/_eval/registry.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import inspect
 import logging
 from copy import deepcopy
+from functools import wraps
 from pathlib import Path
 from typing import Any, Callable, TypeVar, cast, overload
@@ -125,6 +126,7 @@ def task(*args: Any, name: str | None = None, **attribs: Any) -> Any:
         params = list(inspect.signature(task_type).parameters.keys())
         # Create and return the wrapper function
+        @wraps(task_type)
         def wrapper(*w_args: Any, **w_kwargs: Any) -> Task:
             # Create the task
             task_instance = task_type(*w_args, **w_kwargs)
@@ -154,6 +156,10 @@ def task(*args: Any, name: str | None = None, **attribs: Any) -> Any:
             # Return the task instance
             return task_instance
+        # functools.wraps overrides the return type annotation of the inner function, so
+        # we explicitly set it again
+        wrapper.__annotations__["return"] = Task
         # Register the task and return the wrapper
         return task_register(
             task=cast(TaskType, wrapper), name=task_name, attribs=attribs, params=params

inspect_ai/_eval/score.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Callable, cast
 from inspect_ai._display import display
 from inspect_ai._util.path import chdir_python
 from inspect_ai._util.platform import platform_init
-from inspect_ai._util.registry import registry_create
+from inspect_ai._util.registry import registry_create, registry_unqualified_name
 from inspect_ai.log import (
     EvalLog,
     EvalMetric,
@@ -185,6 +185,7 @@ async def run_score_task(
         results[scorer_name] = SampleScore(
             score=result,
             sample_id=state.sample_id,
+            scorer=registry_unqualified_name(scorer),
         )
     progress()

inspect_ai/_eval/task/results.py CHANGED Viewed

@@ -65,11 +65,12 @@ def eval_results(
     # extract scorers info from scorers then create scorers info for any
     # scores not already accounted for by a scorer name
     scorers_info = [ScorerInfo.from_scorer(scorer) for scorer in (scorers or [])]
-    scorer_names = [info.name for info in scorers_info]
-    for name in set(key for sample_scores in scores for key in sample_scores):
-        if name not in scorer_names:
-            scorers_info.append(ScorerInfo.from_name(name))
-            scorer_names.append(name)
+    scorer_names = {info.name for info in scorers_info}
+    for sample_scores in scores:
+        for name, sample_score in sample_scores.items():
+            if sample_score.scorer is None and name not in scorer_names:
+                scorers_info.append(ScorerInfo.from_name(name))
+                scorer_names.add(name)
     # record scorer
     if len(scorers_info) > 0:

inspect_ai/_eval/task/run.py CHANGED Viewed

@@ -27,8 +27,12 @@ from inspect_ai._util.constants import (
 from inspect_ai._util.datetime import iso_now
 from inspect_ai._util.error import exception_message
 from inspect_ai._util.hooks import send_telemetry
-from inspect_ai._util.registry import is_registry_object, registry_log_name
-from inspect_ai._util.timeouts import Timeout, timeout, timeout_at
+from inspect_ai._util.registry import (
+    is_registry_object,
+    registry_log_name,
+    registry_unqualified_name,
+)
+from inspect_ai._util.timeouts import Timeout, timeout
 from inspect_ai._view.notify import view_notify_eval
 from inspect_ai.dataset import Dataset, Sample
 from inspect_ai.log import (
@@ -652,20 +656,15 @@ async def task_run_sample(
                 except BaseException as ex:
                     error = handle_error(ex)
-                # set timeout for scoring. if the original timeout was never hit
-                # then just create a new timeout_cm targeting the original
-                # timeout time. if the original timeout was hit we still want
-                # to provide an opportunity for scoring, but we don't necessarily
+                # set timeout for scoring. if the original timeout was hit we still
+                # want to provide opportunity for scoring, but we don't necessarily
                 # want to wait the full timeout again (especially in the case where
                 # the cause of the timeout is a hung container and scoring requires
                 # interacting with the container). as a middle ground we use half
                 # of the original timeout value for scoring.
                 if isinstance(timeout_cm, Timeout):
-                    if not timeout_cm.expired():
-                        timeout_cm = timeout_at(timeout_cm.when())
-                    else:
-                        assert time_limit
-                        timeout_cm = timeout(time_limit / 2)
+                    assert time_limit
+                    timeout_cm = timeout(time_limit / 2)
                 # turn off sample limits
                 set_active_sample_token_limit(None)
@@ -690,6 +689,7 @@ async def task_run_sample(
                                         sample_score = SampleScore(
                                             score=score_result,
                                             sample_id=sample.id,
+                                            scorer=registry_unqualified_name(scorer),
                                         )
                                         transcript()._event(
                                             ScoreEvent(

inspect-ai 0.3.59__py3-none-any.whl → 0.3.60__py3-none-any.whl

inspect-ai 0.3.59py3-none-any.whl → 0.3.60py3-none-any.whl