inspect-ai 0.3.75__py3-none-any.whl → 0.3.76__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. inspect_ai/_eval/evalset.py +3 -2
  2. inspect_ai/_eval/registry.py +3 -5
  3. inspect_ai/_eval/run.py +4 -0
  4. inspect_ai/_eval/task/run.py +4 -0
  5. inspect_ai/_util/logger.py +3 -0
  6. inspect_ai/_view/www/dist/assets/index.css +28 -16
  7. inspect_ai/_view/www/dist/assets/index.js +4801 -4615
  8. inspect_ai/_view/www/log-schema.json +79 -9
  9. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
  10. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
  11. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
  12. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
  13. inspect_ai/_view/www/src/types/log.d.ts +11 -5
  14. inspect_ai/log/_recorders/json.py +8 -0
  15. inspect_ai/log/_transcript.py +13 -4
  16. inspect_ai/model/_call_tools.py +13 -4
  17. inspect_ai/model/_chat_message.py +3 -0
  18. inspect_ai/model/_model.py +5 -1
  19. inspect_ai/model/_model_output.py +6 -1
  20. inspect_ai/model/_openai.py +11 -6
  21. inspect_ai/model/_providers/anthropic.py +133 -75
  22. inspect_ai/model/_providers/openai.py +11 -8
  23. inspect_ai/model/_providers/vertex.py +5 -2
  24. inspect_ai/tool/__init__.py +4 -0
  25. inspect_ai/tool/_tool_call.py +5 -2
  26. inspect_ai/tool/_tool_support_helpers.py +200 -0
  27. inspect_ai/tool/_tools/_bash_session.py +119 -0
  28. inspect_ai/tool/_tools/_computer/_computer.py +1 -1
  29. inspect_ai/tool/_tools/_text_editor.py +121 -0
  30. inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
  31. inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
  32. inspect_ai/tool/_tools/_web_search.py +1 -1
  33. inspect_ai/util/_json.py +28 -0
  34. inspect_ai/util/_sandbox/context.py +16 -7
  35. inspect_ai/util/_sandbox/docker/config.py +1 -1
  36. inspect_ai/util/_sandbox/docker/internal.py +3 -3
  37. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/METADATA +5 -2
  38. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/RECORD +42 -68
  39. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/WHEEL +1 -1
  40. inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
  41. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
  42. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
  43. inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
  44. inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
  45. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
  46. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
  47. inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
  48. inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
  49. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
  50. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
  51. inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
  52. inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
  53. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
  54. inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
  55. inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
  56. inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
  57. inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
  58. inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
  59. inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
  60. inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
  61. inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
  62. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
  63. inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
  64. inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
  65. inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
  66. inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
  67. inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
  68. inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
  69. inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
  70. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/entry_points.txt +0 -0
  71. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info/licenses}/LICENSE +0 -0
  72. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/top_level.txt +0 -0
@@ -35,7 +35,7 @@ from inspect_ai.model import (
35
35
  from inspect_ai.model._generate_config import GenerateConfig
36
36
  from inspect_ai.solver._solver import Solver, SolverSpec
37
37
  from inspect_ai.util import DisplayType, SandboxEnvironmentType
38
- from inspect_ai.util._display import init_display_type
38
+ from inspect_ai.util._display import display_type_initialized, init_display_type
39
39
 
40
40
  from .eval import eval, eval_init
41
41
  from .loader import resolve_task_args
@@ -234,7 +234,8 @@ def eval_set(
234
234
  return results
235
235
 
236
236
  # initialise display (otherwise eval_init will set it to full)
237
- display = init_display_type(display)
237
+ if not display_type_initialized():
238
+ display = init_display_type(display)
238
239
  if display == "conversation":
239
240
  raise RuntimeError("eval_set cannot be used with conversation display.")
240
241
 
@@ -75,12 +75,10 @@ def task_create(name: str, **kwargs: Any) -> Task:
75
75
  task_params: list[str] = task_info.metadata["params"]
76
76
  task_args: dict[str, Any] = {}
77
77
  for param in kwargs.keys():
78
- if param in task_params:
78
+ if param in task_params or "kwargs" in task_params:
79
79
  task_args[param] = kwargs[param]
80
- if "kwargs" in task_params:
81
- task_args[param] = kwargs[param]
82
- else:
83
- logger.warning(f"param '{param}' not used by task '{name}'")
80
+ else:
81
+ logger.warning(f"param '{param}' not used by task '{name}'")
84
82
 
85
83
  return cast(Task, registry_create("task", name, **task_args))
86
84
 
inspect_ai/_eval/run.py CHANGED
@@ -4,6 +4,7 @@ import os
4
4
  import sys
5
5
  from typing import Awaitable, Callable, Set, cast
6
6
 
7
+ from inspect_ai._eval.task.task import Task
7
8
  from inspect_ai._util.trace import trace_action
8
9
 
9
10
  if sys.version_info < (3, 11):
@@ -81,6 +82,7 @@ async def eval_run(
81
82
  eval_wd = os.getcwd()
82
83
 
83
84
  # ensure sample ids
85
+ task: Task | None = None
84
86
  for resolved_task in tasks:
85
87
  # add sample ids to dataset if they aren't there (start at 1 not 0)
86
88
  task = resolved_task.task
@@ -91,6 +93,8 @@ async def eval_run(
91
93
  # Ensure sample ids are unique
92
94
  ensure_unique_ids(task.dataset)
93
95
 
96
+ assert task, "Must encounter a task"
97
+
94
98
  # run startup pass for the sandbox environments
95
99
  shutdown_sandbox_environments: Callable[[], Awaitable[None]] | None = None
96
100
  if has_sandbox:
@@ -599,6 +599,10 @@ async def task_run_sample(
599
599
  )
600
600
 
601
601
  async with sandboxenv_cm:
602
+ timeout_cm: (
603
+ contextlib._GeneratorContextManager[anyio.CancelScope, None, None]
604
+ | contextlib.nullcontext[None]
605
+ ) = contextlib.nullcontext()
602
606
  try:
603
607
  # update active sample wth sandboxes now that we are initialised
604
608
  active.sandboxes = await sandbox_connections()
@@ -150,6 +150,9 @@ def init_logger(log_level: str | None, log_level_transcript: str | None = None)
150
150
  transcript_levelno=transcript_levelno,
151
151
  )
152
152
 
153
+ # set the global log level
154
+ getLogger().setLevel(log_level)
155
+
153
156
  # set the log level for our package
154
157
  getLogger(PKG_NAME).setLevel(capture_level)
155
158
  getLogger(PKG_NAME).addHandler(_logHandler)
@@ -16461,44 +16461,44 @@ ul.jsondiffpatch-textdiff {
16461
16461
  font-weight: 600;
16462
16462
  padding-bottom: 0.3em;
16463
16463
  }
16464
- ._output_3axgd_1 {
16464
+ ._output_15urk_1 {
16465
16465
  padding-top: 1em;
16466
16466
  }
16467
16467
 
16468
- ._container_3axgd_5 {
16468
+ ._container_15urk_5 {
16469
16469
  margin: 0.5em 0;
16470
16470
  width: 100%;
16471
16471
  }
16472
16472
 
16473
- ._all_3axgd_10 {
16473
+ ._all_15urk_10 {
16474
16474
  display: grid;
16475
16475
  grid-template-columns: 1fr 1fr 1fr;
16476
16476
  column-gap: 1em;
16477
16477
  }
16478
16478
 
16479
- ._tableSelection_3axgd_16 {
16479
+ ._tableSelection_15urk_16 {
16480
16480
  width: fit-content;
16481
16481
  align-self: start;
16482
16482
  justify-self: start;
16483
16483
  }
16484
16484
 
16485
- ._tools_3axgd_22 {
16485
+ ._tools_15urk_22 {
16486
16486
  grid-column: -1/1;
16487
16487
  }
16488
16488
 
16489
- ._codePre_3axgd_26 {
16489
+ ._codePre_15urk_26 {
16490
16490
  background: var(--bs-light);
16491
16491
  width: 100%;
16492
16492
  padding: 0.5em;
16493
16493
  border-radius: var(--bs-border-radius);
16494
16494
  }
16495
16495
 
16496
- ._code_3axgd_26 {
16497
- white-space: pre-wrap;
16498
- word-wrap: anywhere;
16496
+ ._code_15urk_26 {
16497
+ white-space: pre-wrap !important;
16498
+ word-wrap: anywhere !important;
16499
16499
  }
16500
16500
 
16501
- ._toolConfig_3axgd_38 {
16501
+ ._toolConfig_15urk_38 {
16502
16502
  display: grid;
16503
16503
  grid-template-columns: max-content auto;
16504
16504
  column-gap: 1em;
@@ -17032,12 +17032,14 @@ div.ap-player div.ap-control-bar * {
17032
17032
  div.ap-control-bar svg.ap-icon path {
17033
17033
  fill: var(--term-color-foreground);
17034
17034
  }
17035
- div.ap-control-bar span.ap-playback-button {
17035
+ div.ap-control-bar span.ap-button {
17036
17036
  display: flex;
17037
17037
  flex: 0 0 auto;
17038
17038
  cursor: pointer;
17039
- height: 12px;
17039
+ }
17040
+ div.ap-control-bar span.ap-playback-button {
17040
17041
  width: 12px;
17042
+ height: 12px;
17041
17043
  padding: 10px;
17042
17044
  }
17043
17045
  div.ap-control-bar span.ap-playback-button svg {
@@ -17104,13 +17106,9 @@ div.ap-control-bar.ap-seekable .ap-progressbar .ap-bar {
17104
17106
  cursor: pointer;
17105
17107
  }
17106
17108
  div.ap-control-bar .ap-fullscreen-button {
17107
- display: block;
17108
- flex: 0 0 auto;
17109
17109
  width: 14px;
17110
17110
  height: 14px;
17111
17111
  padding: 9px;
17112
- cursor: pointer;
17113
- position: relative;
17114
17112
  }
17115
17113
  div.ap-control-bar .ap-fullscreen-button svg {
17116
17114
  width: 14px;
@@ -17127,6 +17125,20 @@ div.ap-control-bar .ap-fullscreen-button .ap-tooltip {
17127
17125
  left: initial;
17128
17126
  transform: none;
17129
17127
  }
17128
+ div.ap-control-bar .ap-kbd-button {
17129
+ height: 14px;
17130
+ padding: 9px;
17131
+ margin: 0 4px;
17132
+ }
17133
+ div.ap-control-bar .ap-kbd-button svg {
17134
+ width: 26px;
17135
+ height: 14px;
17136
+ }
17137
+ div.ap-control-bar .ap-kbd-button .ap-tooltip {
17138
+ right: 5px;
17139
+ left: initial;
17140
+ transform: none;
17141
+ }
17130
17142
  div.ap-wrapper.ap-hud .ap-control-bar {
17131
17143
  opacity: 1;
17132
17144
  }