inspect-ai 0.3.59__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. inspect_ai/_cli/eval.py +0 -7
  2. inspect_ai/_display/textual/widgets/samples.py +1 -1
  3. inspect_ai/_eval/eval.py +10 -1
  4. inspect_ai/_eval/loader.py +79 -19
  5. inspect_ai/_eval/registry.py +6 -0
  6. inspect_ai/_eval/score.py +2 -1
  7. inspect_ai/_eval/task/results.py +6 -5
  8. inspect_ai/_eval/task/run.py +11 -11
  9. inspect_ai/_view/www/dist/assets/index.js +262 -303
  10. inspect_ai/_view/www/src/App.mjs +6 -6
  11. inspect_ai/_view/www/src/Types.mjs +1 -1
  12. inspect_ai/_view/www/src/api/Types.ts +133 -0
  13. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  14. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  15. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  16. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  17. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  18. inspect_ai/_view/www/src/api/index.ts +51 -0
  19. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  20. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  21. inspect_ai/_view/www/src/index.js +2 -2
  22. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  23. inspect_ai/_view/www/src/navbar/Navbar.mjs +1 -1
  24. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +1 -1
  25. inspect_ai/_view/www/src/samples/SampleList.mjs +1 -1
  26. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  27. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +14 -14
  28. inspect_ai/_view/www/src/samples/SamplesTab.mjs +10 -10
  29. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  30. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +1 -3
  31. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  32. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  33. inspect_ai/approval/_human/manager.py +1 -1
  34. inspect_ai/model/_call_tools.py +55 -0
  35. inspect_ai/model/_conversation.py +1 -4
  36. inspect_ai/model/_generate_config.py +2 -8
  37. inspect_ai/model/_model_output.py +15 -0
  38. inspect_ai/model/_openai.py +383 -0
  39. inspect_ai/model/_providers/anthropic.py +52 -11
  40. inspect_ai/model/_providers/azureai.py +1 -1
  41. inspect_ai/model/_providers/goodfire.py +248 -0
  42. inspect_ai/model/_providers/groq.py +7 -3
  43. inspect_ai/model/_providers/hf.py +6 -0
  44. inspect_ai/model/_providers/mistral.py +2 -1
  45. inspect_ai/model/_providers/openai.py +36 -202
  46. inspect_ai/model/_providers/openai_o1.py +2 -4
  47. inspect_ai/model/_providers/providers.py +22 -0
  48. inspect_ai/model/_providers/together.py +4 -4
  49. inspect_ai/model/_providers/util/__init__.py +2 -3
  50. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  51. inspect_ai/model/_providers/util/llama31.py +1 -1
  52. inspect_ai/model/_providers/util/util.py +0 -76
  53. inspect_ai/scorer/_metric.py +3 -0
  54. inspect_ai/scorer/_scorer.py +2 -1
  55. inspect_ai/solver/__init__.py +2 -0
  56. inspect_ai/solver/_basic_agent.py +1 -1
  57. inspect_ai/solver/_bridge/__init__.py +3 -0
  58. inspect_ai/solver/_bridge/bridge.py +100 -0
  59. inspect_ai/solver/_bridge/patch.py +170 -0
  60. inspect_ai/solver/_solver.py +6 -0
  61. inspect_ai/util/_display.py +5 -0
  62. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  63. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  64. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +68 -63
  65. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  66. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  67. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  68. inspect_ai/_view/www/src/api/index.mjs +0 -49
  69. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  70. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  71. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  72. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  73. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  74. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/eval.py CHANGED
@@ -314,12 +314,6 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
314
314
  help="Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
315
315
  envvar="INSPECT_EVAL_STOP_SEQS",
316
316
  )
317
- @click.option(
318
- "--suffix",
319
- type=str,
320
- help="The suffix that comes after a completion of inserted text. OpenAI only.",
321
- envvar="INSPECT_EVAL_SUFFIX",
322
- )
323
317
  @click.option(
324
318
  "--temperature",
325
319
  type=float,
@@ -439,7 +433,6 @@ def eval_command(
439
433
  logit_bias: str | None,
440
434
  seed: int | None,
441
435
  stop_seqs: str | None,
442
- suffix: str | None,
443
436
  temperature: float | None,
444
437
  top_p: float | None,
445
438
  top_k: int | None,
@@ -413,7 +413,7 @@ class SampleToolbar(Horizontal):
413
413
  grid-columns: auto auto 1fr auto auto;
414
414
  }}
415
415
  SampleToolbar #{STATUS_GROUP} {{
416
- min-width: 20;
416
+ width: 22;
417
417
  }}
418
418
  SampleToolbar Button {{
419
419
  margin-bottom: 1;
inspect_ai/_eval/eval.py CHANGED
@@ -35,7 +35,12 @@ from inspect_ai.scorer._reducer import reducer_log_names
35
35
  from inspect_ai.solver._chain import chain
36
36
  from inspect_ai.solver._solver import Solver, SolverSpec
37
37
  from inspect_ai.util import SandboxEnvironmentType
38
- from inspect_ai.util._display import DisplayType, display_type, init_display_type
38
+ from inspect_ai.util._display import (
39
+ DisplayType,
40
+ display_type,
41
+ display_type_initialized,
42
+ init_display_type,
43
+ )
39
44
 
40
45
  from .context import init_eval_context
41
46
  from .loader import ResolvedTask, resolve_tasks
@@ -306,6 +311,10 @@ async def eval_async(
306
311
 
307
312
  _eval_async_running = True
308
313
 
314
+ # if we are called outside of eval() then set display type to "plain"
315
+ if not display_type_initialized():
316
+ init_display_type("plain")
317
+
309
318
  # resolve model and task args
310
319
  model_args = resolve_args(model_args)
311
320
  task_args = resolve_args(task_args)
@@ -1,5 +1,6 @@
1
1
  import ast
2
2
  import contextlib
3
+ import inspect
3
4
  import os
4
5
  from dataclasses import dataclass, field
5
6
  from importlib.machinery import SourceFileLoader
@@ -9,11 +10,13 @@ from pathlib import Path
9
10
  from types import ModuleType
10
11
  from typing import Any, Callable, cast
11
12
 
13
+ from typing_extensions import overload
14
+
12
15
  from inspect_ai._eval.task.util import task_file, task_run_dir
13
16
  from inspect_ai._util.decorator import parse_decorators
14
17
  from inspect_ai._util.error import PrerequisiteError
15
18
  from inspect_ai._util.logger import warn_once
16
- from inspect_ai._util.path import chdir_python
19
+ from inspect_ai._util.path import chdir_python, cwd_relative_path
17
20
  from inspect_ai._util.registry import (
18
21
  RegistryInfo,
19
22
  is_registry_object,
@@ -23,6 +26,7 @@ from inspect_ai._util.registry import (
23
26
  registry_params,
24
27
  )
25
28
  from inspect_ai.model import Model, ModelName
29
+ from inspect_ai.solver._bridge import bridge
26
30
  from inspect_ai.solver._solver import Solver, SolverSpec
27
31
  from inspect_ai.util import SandboxEnvironmentSpec, SandboxEnvironmentType
28
32
  from inspect_ai.util._sandbox.environment import resolve_sandbox_environment
@@ -334,6 +338,16 @@ def split_spec(spec: str) -> tuple[str, str | None]:
334
338
  return spec, None
335
339
 
336
340
 
341
+ @overload
342
+ def load_module(
343
+ module_path: Path, filter: Callable[[str], bool]
344
+ ) -> ModuleType | None: ...
345
+
346
+
347
+ @overload
348
+ def load_module(module_path: Path, filter: None = None) -> ModuleType: ...
349
+
350
+
337
351
  def load_module(
338
352
  module_path: Path, filter: Callable[[str], bool] | None = None
339
353
  ) -> ModuleType | None:
@@ -425,28 +439,74 @@ def solver_from_spec(spec: SolverSpec) -> Solver:
425
439
  else contextlib.nullcontext()
426
440
  )
427
441
 
442
+ # pretty solver name for error messages
443
+ pretty_solver_file = (
444
+ cwd_relative_path(solver_file.as_posix()) if solver_file else None
445
+ )
446
+
428
447
  with create_cm:
429
- # if we have a file then we need to load it and (if required) determine the solver name
430
- if solver_file is not None:
431
- # load the module so that registry_create works
432
- load_module(solver_file)
448
+ # if there is no solver file then just create from the registry by name
449
+ if solver_file is None:
450
+ if solver_name is None:
451
+ raise ValueError(f"Unable to resolve solver name from {spec.solver}")
452
+ return cast(Solver, registry_create("solver", solver_name, **spec.args))
433
453
 
434
- # if there is no solver_name we need to discover the first @solver
454
+ # we do have a solver file
455
+ else:
456
+ # load the module and parse decorators
457
+ solver_module = load_module(solver_file)
458
+ decorators = parse_decorators(solver_file, "solver")
459
+
460
+ # if there is no solver_name see if we can discover it
435
461
  if solver_name is None:
436
- solvers = parse_decorators(solver_file, "solver")
437
- if len(solvers) == 0:
462
+ if len(decorators) == 1:
463
+ # decorator based solver
464
+ solver_name = decorators[0][0]
465
+ elif len(decorators) == 0:
466
+ # see if we can find an agent based solver
467
+ functions = [
468
+ function
469
+ for function in inspect.getmembers(
470
+ solver_module, inspect.isfunction
471
+ )
472
+ if function[1].__module__ == solver_module.__name__
473
+ ]
474
+ agent_functions = [
475
+ function
476
+ for function in functions
477
+ if "agent" in function[0] and not function[0].startswith("_")
478
+ ]
479
+ if len(agent_functions) == 1:
480
+ # agent based solver
481
+ solver_name = agent_functions[0][0]
482
+
483
+ elif len(agent_functions) == 0:
484
+ raise PrerequisiteError(
485
+ f"The source file {pretty_solver_file} does not contain any @solver functions or agent functions."
486
+ )
487
+ else:
488
+ raise PrerequisiteError(
489
+ f"The source file {pretty_solver_file} has more than one agent function (qualify which agent using e.g. '{solver_file.name}@agent_fn')"
490
+ )
491
+ else:
438
492
  raise PrerequisiteError(
439
- f"The source file {solver_file.as_posix()} does not contain any @solver functions."
493
+ f"The source file {pretty_solver_file} has more than one @solver function (qualify which solver using e.g. '{solver_file.name}y@solver_fn')"
440
494
  )
441
- if len(solvers) > 1:
442
- raise PrerequisiteError(
443
- f"The source file {solver_file.as_posix()} has more than one @solver function (qualify which solver using file.py@solver)"
444
- )
445
- solver_name = solvers[0][0]
446
495
 
447
- # make mypy happy and catch unexpected branching
448
- if solver_name is None:
449
- raise ValueError(f"Unable to resolve solver name from {spec.solver}")
496
+ # create decorator based solvers using the registry
497
+ if any(solver[0] == solver_name for solver in decorators):
498
+ return cast(Solver, registry_create("solver", solver_name, **spec.args))
450
499
 
451
- solver = cast(Solver, registry_create("solver", solver_name, **spec.args))
452
- return solver
500
+ # create agent based solvers by calling the function and wrapping it in bridge()
501
+ else:
502
+ agent_fn = getattr(solver_module, solver_name, None)
503
+ if inspect.isfunction(agent_fn):
504
+ return bridge(agent_fn(**spec.args))
505
+ elif agent_fn is not None:
506
+ raise PrerequisiteError(
507
+ f"The object {solver_name} in file {pretty_solver_file} is not a Python function."
508
+ )
509
+ else:
510
+ raise PrerequisiteError(
511
+ f"The function {solver_name} was not found in file {pretty_solver_file}."
512
+ )
@@ -1,6 +1,7 @@
1
1
  import inspect
2
2
  import logging
3
3
  from copy import deepcopy
4
+ from functools import wraps
4
5
  from pathlib import Path
5
6
  from typing import Any, Callable, TypeVar, cast, overload
6
7
 
@@ -125,6 +126,7 @@ def task(*args: Any, name: str | None = None, **attribs: Any) -> Any:
125
126
  params = list(inspect.signature(task_type).parameters.keys())
126
127
 
127
128
  # Create and return the wrapper function
129
+ @wraps(task_type)
128
130
  def wrapper(*w_args: Any, **w_kwargs: Any) -> Task:
129
131
  # Create the task
130
132
  task_instance = task_type(*w_args, **w_kwargs)
@@ -154,6 +156,10 @@ def task(*args: Any, name: str | None = None, **attribs: Any) -> Any:
154
156
  # Return the task instance
155
157
  return task_instance
156
158
 
159
+ # functools.wraps overrides the return type annotation of the inner function, so
160
+ # we explicitly set it again
161
+ wrapper.__annotations__["return"] = Task
162
+
157
163
  # Register the task and return the wrapper
158
164
  return task_register(
159
165
  task=cast(TaskType, wrapper), name=task_name, attribs=attribs, params=params
inspect_ai/_eval/score.py CHANGED
@@ -5,7 +5,7 @@ from typing import Callable, cast
5
5
  from inspect_ai._display import display
6
6
  from inspect_ai._util.path import chdir_python
7
7
  from inspect_ai._util.platform import platform_init
8
- from inspect_ai._util.registry import registry_create
8
+ from inspect_ai._util.registry import registry_create, registry_unqualified_name
9
9
  from inspect_ai.log import (
10
10
  EvalLog,
11
11
  EvalMetric,
@@ -185,6 +185,7 @@ async def run_score_task(
185
185
  results[scorer_name] = SampleScore(
186
186
  score=result,
187
187
  sample_id=state.sample_id,
188
+ scorer=registry_unqualified_name(scorer),
188
189
  )
189
190
 
190
191
  progress()
@@ -65,11 +65,12 @@ def eval_results(
65
65
  # extract scorers info from scorers then create scorers info for any
66
66
  # scores not already accounted for by a scorer name
67
67
  scorers_info = [ScorerInfo.from_scorer(scorer) for scorer in (scorers or [])]
68
- scorer_names = [info.name for info in scorers_info]
69
- for name in set(key for sample_scores in scores for key in sample_scores):
70
- if name not in scorer_names:
71
- scorers_info.append(ScorerInfo.from_name(name))
72
- scorer_names.append(name)
68
+ scorer_names = {info.name for info in scorers_info}
69
+ for sample_scores in scores:
70
+ for name, sample_score in sample_scores.items():
71
+ if sample_score.scorer is None and name not in scorer_names:
72
+ scorers_info.append(ScorerInfo.from_name(name))
73
+ scorer_names.add(name)
73
74
 
74
75
  # record scorer
75
76
  if len(scorers_info) > 0:
@@ -27,8 +27,12 @@ from inspect_ai._util.constants import (
27
27
  from inspect_ai._util.datetime import iso_now
28
28
  from inspect_ai._util.error import exception_message
29
29
  from inspect_ai._util.hooks import send_telemetry
30
- from inspect_ai._util.registry import is_registry_object, registry_log_name
31
- from inspect_ai._util.timeouts import Timeout, timeout, timeout_at
30
+ from inspect_ai._util.registry import (
31
+ is_registry_object,
32
+ registry_log_name,
33
+ registry_unqualified_name,
34
+ )
35
+ from inspect_ai._util.timeouts import Timeout, timeout
32
36
  from inspect_ai._view.notify import view_notify_eval
33
37
  from inspect_ai.dataset import Dataset, Sample
34
38
  from inspect_ai.log import (
@@ -652,20 +656,15 @@ async def task_run_sample(
652
656
  except BaseException as ex:
653
657
  error = handle_error(ex)
654
658
 
655
- # set timeout for scoring. if the original timeout was never hit
656
- # then just create a new timeout_cm targeting the original
657
- # timeout time. if the original timeout was hit we still want
658
- # to provide an opportunity for scoring, but we don't necessarily
659
+ # set timeout for scoring. if the original timeout was hit we still
660
+ # want to provide opportunity for scoring, but we don't necessarily
659
661
  # want to wait the full timeout again (especially in the case where
660
662
  # the cause of the timeout is a hung container and scoring requires
661
663
  # interacting with the container). as a middle ground we use half
662
664
  # of the original timeout value for scoring.
663
665
  if isinstance(timeout_cm, Timeout):
664
- if not timeout_cm.expired():
665
- timeout_cm = timeout_at(timeout_cm.when())
666
- else:
667
- assert time_limit
668
- timeout_cm = timeout(time_limit / 2)
666
+ assert time_limit
667
+ timeout_cm = timeout(time_limit / 2)
669
668
 
670
669
  # turn off sample limits
671
670
  set_active_sample_token_limit(None)
@@ -690,6 +689,7 @@ async def task_run_sample(
690
689
  sample_score = SampleScore(
691
690
  score=score_result,
692
691
  sample_id=sample.id,
692
+ scorer=registry_unqualified_name(scorer),
693
693
  )
694
694
  transcript()._event(
695
695
  ScoreEvent(