inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -2
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +1 -1
  16. inspect_ai/_eval/evalset.py +3 -3
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +5 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +2 -0
  21. inspect_ai/_eval/task/run.py +65 -39
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/server.py +17 -0
  28. inspect_ai/_view/www/dist/assets/index.css +93 -31
  29. inspect_ai/_view/www/dist/assets/index.js +10639 -10011
  30. inspect_ai/_view/www/log-schema.json +418 -1
  31. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  32. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  36. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  37. inspect_ai/_view/www/package.json +2 -2
  38. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  39. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  40. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  41. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  42. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  43. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  45. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
  46. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  48. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  49. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  50. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  51. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  55. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  56. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  57. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  58. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  59. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  60. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  61. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  62. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
  63. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
  64. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
  65. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
  66. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  67. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  68. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  69. inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
  70. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  71. inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
  72. inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
  73. inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
  74. inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
  75. inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
  76. inspect_ai/_view/www/src/client/api/types.ts +3 -0
  77. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  78. inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
  79. inspect_ai/_view/www/src/tests/README.md +2 -2
  80. inspect_ai/_view/www/src/utils/git.ts +3 -1
  81. inspect_ai/_view/www/src/utils/html.ts +6 -0
  82. inspect_ai/agent/_handoff.py +8 -5
  83. inspect_ai/agent/_react.py +5 -5
  84. inspect_ai/dataset/_dataset.py +1 -1
  85. inspect_ai/log/_condense.py +5 -0
  86. inspect_ai/log/_file.py +4 -1
  87. inspect_ai/log/_log.py +9 -4
  88. inspect_ai/log/_recorders/json.py +4 -2
  89. inspect_ai/log/_samples.py +5 -0
  90. inspect_ai/log/_util.py +2 -0
  91. inspect_ai/model/__init__.py +14 -0
  92. inspect_ai/model/_call_tools.py +17 -8
  93. inspect_ai/model/_chat_message.py +3 -0
  94. inspect_ai/model/_openai_responses.py +80 -34
  95. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  96. inspect_ai/model/_providers/_google_citations.py +100 -0
  97. inspect_ai/model/_providers/anthropic.py +219 -36
  98. inspect_ai/model/_providers/google.py +98 -22
  99. inspect_ai/model/_providers/mistral.py +20 -7
  100. inspect_ai/model/_providers/openai.py +11 -10
  101. inspect_ai/model/_providers/openai_compatible.py +3 -2
  102. inspect_ai/model/_providers/openai_responses.py +2 -5
  103. inspect_ai/model/_providers/perplexity.py +123 -0
  104. inspect_ai/model/_providers/providers.py +13 -2
  105. inspect_ai/model/_providers/vertex.py +3 -0
  106. inspect_ai/model/_trim.py +5 -0
  107. inspect_ai/tool/__init__.py +14 -0
  108. inspect_ai/tool/_mcp/_mcp.py +5 -2
  109. inspect_ai/tool/_mcp/sampling.py +19 -3
  110. inspect_ai/tool/_mcp/server.py +1 -1
  111. inspect_ai/tool/_tool.py +10 -1
  112. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  113. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  114. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  115. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  116. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  117. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  118. inspect_ai/util/__init__.py +8 -0
  119. inspect_ai/util/_background.py +64 -0
  120. inspect_ai/util/_display.py +11 -2
  121. inspect_ai/util/_limit.py +72 -5
  122. inspect_ai/util/_sandbox/__init__.py +2 -0
  123. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  124. inspect_ai/util/_sandbox/service.py +28 -7
  125. inspect_ai/util/_span.py +12 -1
  126. inspect_ai/util/_subprocess.py +51 -38
  127. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
  128. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
  129. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  130. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  131. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
  132. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
  133. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
  134. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/common.py CHANGED
@@ -60,7 +60,8 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
60
60
  @click.option(
61
61
  "--display",
62
62
  type=click.Choice(
63
- ["full", "conversation", "rich", "plain", "none"], case_sensitive=False
63
+ ["full", "conversation", "rich", "plain", "log", "none"],
64
+ case_sensitive=False,
64
65
  ),
65
66
  default=DEFAULT_DISPLAY,
66
67
  envvar="INSPECT_DISPLAY",
inspect_ai/_cli/eval.py CHANGED
@@ -641,7 +641,7 @@ def eval_command(
641
641
  @click.option(
642
642
  "--retry-connections",
643
643
  type=float,
644
- help="Reduce max_connections at this rate with each retry (defaults to 0.5)",
644
+ help="Reduce max_connections at this rate with each retry (defaults to 1.0, which results in no reduction).",
645
645
  envvar="INSPECT_EVAL_RETRY_CONNECTIONS",
646
646
  )
647
647
  @click.option(
@@ -966,7 +966,7 @@ def eval_exec(
966
966
  success, _ = eval_set(**params)
967
967
  return success
968
968
  else:
969
- params["log_header_only"] = (True,) # cli invocation doesn't need full log
969
+ params["log_header_only"] = True # cli invocation doesn't need full log
970
970
  eval(**params)
971
971
  return True
972
972
 
@@ -5,6 +5,7 @@ import rich
5
5
 
6
6
  from inspect_ai.util._display import display_type
7
7
 
8
+ from ..log.display import LogDisplay
8
9
  from ..plain.display import PlainDisplay
9
10
  from ..rich.display import RichDisplay
10
11
  from ..textual.display import TextualDisplay
@@ -24,6 +25,8 @@ def display() -> Display:
24
25
  and not rich.get_console().is_jupyter
25
26
  ):
26
27
  _active_display = TextualDisplay()
28
+ elif display_type() == "log":
29
+ _active_display = LogDisplay()
27
30
  else:
28
31
  _active_display = RichDisplay()
29
32
 
@@ -30,6 +30,7 @@ def task_config(
30
30
  config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
31
31
  if profile.tags:
32
32
  config["tags"] = ",".join(profile.tags)
33
+ config["dataset"] = profile.dataset
33
34
  config_print: list[str] = []
34
35
  for name, value in config.items():
35
36
  if name == "approval" and isinstance(value, dict):
@@ -1,7 +1,7 @@
1
1
  from typing import Tuple
2
2
 
3
3
  import rich
4
- from rich.console import RenderableType
4
+ from rich.console import Group, RenderableType
5
5
  from rich.panel import Panel
6
6
  from rich.table import Table
7
7
  from rich.text import Text
@@ -9,7 +9,7 @@ from rich.text import Text
9
9
  from inspect_ai._util.constants import CONSOLE_DISPLAY_WIDTH
10
10
  from inspect_ai._util.path import cwd_relative_path
11
11
  from inspect_ai._util.registry import registry_unqualified_name
12
- from inspect_ai.util._display import display_type
12
+ from inspect_ai.util._display import display_type_plain
13
13
 
14
14
  from .display import TaskProfile
15
15
  from .rich import is_vscode_notebook, rich_theme
@@ -27,7 +27,7 @@ def task_panel(
27
27
  log_location: str | None,
28
28
  ) -> RenderableType:
29
29
  # dispatch to plain handler if we are in plain mode
30
- if display_type() == "plain":
30
+ if display_type_plain():
31
31
  return task_panel_plain(
32
32
  profile, show_model, body, subtitle, footer, log_location
33
33
  )
@@ -89,23 +89,31 @@ def task_panel(
89
89
  log_location_relative = log_location
90
90
 
91
91
  root = Table.grid(expand=True)
92
- root.add_column()
92
+ root.add_column(overflow="fold")
93
93
  root.add_row(table)
94
94
  root.add_row()
95
95
  root.add_row(
96
96
  f"[bold][{theme.light}]Log:[/{theme.light}][/bold] "
97
97
  + f"[{theme.link}]{log_location_relative}[/{theme.link}]"
98
98
  )
99
+ root.add_row()
99
100
 
100
- # create panel w/ title
101
- panel = Panel(
102
- root,
103
- title=task_panel_title(profile, show_model),
104
- title_align="left",
105
- width=width,
106
- expand=True,
107
- )
108
- return panel
101
+ panel = Panel(
102
+ task_panel_title(profile, show_model),
103
+ padding=(0, 0),
104
+ width=width,
105
+ height=3,
106
+ expand=True,
107
+ )
108
+ return Group(panel, root)
109
+ else:
110
+ return Panel(
111
+ root,
112
+ title=task_panel_title(profile, show_model),
113
+ title_align="left",
114
+ width=width,
115
+ expand=True,
116
+ )
109
117
 
110
118
 
111
119
  def task_panel_plain(
@@ -18,7 +18,7 @@ from .display import (
18
18
  TaskSuccess,
19
19
  TaskWithResult,
20
20
  )
21
- from .panel import task_panel, task_targets
21
+ from .panel import task_panel
22
22
  from .rich import rich_theme
23
23
 
24
24
 
@@ -41,8 +41,6 @@ def task_result_cancelled(
41
41
  ) -> RenderableType:
42
42
  # The contents of the panel
43
43
  config = task_config(profile)
44
- targets = task_targets(profile)
45
- subtitle = config, targets
46
44
  body = task_stats(cancelled.stats)
47
45
 
48
46
  # The panel
@@ -50,7 +48,7 @@ def task_result_cancelled(
50
48
  profile=profile,
51
49
  show_model=True,
52
50
  body=body,
53
- subtitle=subtitle,
51
+ subtitle=config,
54
52
  footer=task_interrupted(profile, cancelled.samples_completed),
55
53
  log_location=profile.log_location,
56
54
  )
@@ -76,8 +74,6 @@ def task_results(profile: TaskProfile, success: TaskSuccess) -> RenderableType:
76
74
  def task_result_summary(profile: TaskProfile, success: TaskSuccess) -> RenderableType:
77
75
  # The contents of the panel
78
76
  config = task_config(profile)
79
- targets = task_targets(profile)
80
- subtitle = config, targets
81
77
  body = task_stats(success.stats)
82
78
 
83
79
  # the panel
@@ -85,7 +81,7 @@ def task_result_summary(profile: TaskProfile, success: TaskSuccess) -> Renderabl
85
81
  profile=profile,
86
82
  show_model=True,
87
83
  body=body,
88
- subtitle=subtitle,
84
+ subtitle=config,
89
85
  footer=task_results(profile, success),
90
86
  log_location=profile.log_location,
91
87
  )
@@ -11,7 +11,7 @@ from typing_extensions import override
11
11
 
12
12
  from inspect_ai._util.platform import is_running_in_jupyterlab, is_running_in_vscode
13
13
  from inspect_ai._util.transcript import transcript_code_theme
14
- from inspect_ai.util._display import display_type
14
+ from inspect_ai.util._display import display_type, display_type_plain
15
15
 
16
16
 
17
17
  def is_vscode_notebook(console: Console) -> bool:
@@ -20,15 +20,13 @@ def is_vscode_notebook(console: Console) -> bool:
20
20
 
21
21
  def rich_no_color() -> bool:
22
22
  return (
23
- display_type() == "plain"
24
- or not is_running_in_vscode()
25
- or is_running_in_jupyterlab()
23
+ display_type_plain() or not is_running_in_vscode() or is_running_in_jupyterlab()
26
24
  )
27
25
 
28
26
 
29
27
  def rich_initialise() -> None:
30
28
  # reflect ansi prefs
31
- if display_type() == "plain":
29
+ if display_type_plain():
32
30
  rich.reconfigure(no_color=True, force_terminal=False, force_interactive=False)
33
31
  elif rich_no_color():
34
32
  rich.reconfigure(no_color=True)
File without changes
@@ -0,0 +1,173 @@
1
+ import contextlib
2
+ import logging
3
+ from typing import AsyncIterator, Callable, Coroutine, Iterator
4
+
5
+ import anyio
6
+ from rich.console import Console
7
+
8
+ from inspect_ai._util._async import configured_async_backend, run_coroutine
9
+ from inspect_ai._util.platform import running_in_notebook
10
+
11
+ from ...util import throttle
12
+ from ...util._concurrency import concurrency_status_display
13
+ from ..core.display import (
14
+ TR,
15
+ Display,
16
+ Progress,
17
+ TaskDisplay,
18
+ TaskDisplayMetric,
19
+ TaskProfile,
20
+ TaskResult,
21
+ TaskScreen,
22
+ TaskSpec,
23
+ TaskWithResult,
24
+ )
25
+ from ..core.footer import task_http_retries_str
26
+ from ..core.results import task_metric, tasks_results
27
+
28
+
29
+ class LogDisplay(Display):
30
+ def __init__(self) -> None:
31
+ self.total_tasks: int = 0
32
+ self.tasks: list[TaskWithResult] = []
33
+ self.parallel = False
34
+
35
+ def print(self, message: str) -> None:
36
+ logging.info(message, stacklevel=2)
37
+
38
+ @contextlib.contextmanager
39
+ def progress(self, total: int) -> Iterator[Progress]:
40
+ yield LogProgress(total)
41
+
42
+ def run_task_app(self, main: Callable[[], Coroutine[None, None, TR]]) -> TR:
43
+ if running_in_notebook():
44
+ return run_coroutine(main())
45
+ else:
46
+ return anyio.run(main, backend=configured_async_backend())
47
+
48
+ @contextlib.contextmanager
49
+ def suspend_task_app(self) -> Iterator[None]:
50
+ yield
51
+
52
+ @contextlib.asynccontextmanager
53
+ async def task_screen(
54
+ self, tasks: list[TaskSpec], parallel: bool
55
+ ) -> AsyncIterator[TaskScreen]:
56
+ self.total_tasks = len(tasks)
57
+ self.tasks = []
58
+ self.parallel = parallel
59
+ try:
60
+ logging.info(f"Running {self.total_tasks} tasks...", stacklevel=3)
61
+ yield TaskScreen()
62
+ finally:
63
+ # Log final results
64
+ if self.tasks:
65
+ self._log_results()
66
+
67
+ @contextlib.contextmanager
68
+ def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
69
+ # Create and yield task display
70
+ task = TaskWithResult(profile, None)
71
+ self.tasks.append(task)
72
+ yield LogTaskDisplay(task)
73
+ self._log_status()
74
+
75
+ def display_counter(self, caption: str, value: str) -> None:
76
+ logging.info(f"{caption}: {value}", stacklevel=2)
77
+
78
+ def _log_status(self) -> None:
79
+ """Log status updates for all tasks"""
80
+ completed_tasks = sum(1 for task in self.tasks if task.result is not None)
81
+ total_tasks = len(self.tasks)
82
+ logging.info(f"{completed_tasks}/{total_tasks} tasks complete", stacklevel=4)
83
+
84
+ def _log_results(self) -> None:
85
+ """Log final results"""
86
+ results = tasks_results(self.tasks)
87
+ console = Console(width=120)
88
+ console.log(results, _stack_offset=4)
89
+
90
+
91
+ class LogProgress(Progress):
92
+ def __init__(self, total: int):
93
+ self.total = total
94
+ self.current = 0
95
+
96
+ def update(self, n: int = 1) -> None:
97
+ self.current += n
98
+
99
+ def complete(self) -> None:
100
+ self.current = self.total
101
+
102
+
103
+ class LogTaskDisplay(TaskDisplay):
104
+ def __init__(self, task: TaskWithResult):
105
+ self.task = task
106
+ self.progress_display: LogProgress | None = None
107
+ self.samples_complete = 0
108
+ self.samples_total = 0
109
+ self.current_metrics: list[TaskDisplayMetric] | None = None
110
+
111
+ @contextlib.contextmanager
112
+ def progress(self) -> Iterator[Progress]:
113
+ self.progress_display = LogProgress(self.task.profile.steps)
114
+ yield self.progress_display
115
+
116
+ @throttle(5)
117
+ def _log_status_throttled(self, stacklevel: int) -> None:
118
+ self._log_status(stacklevel=stacklevel + 2)
119
+
120
+ def _log_status(self, stacklevel: int) -> None:
121
+ """Log status updates"""
122
+ status_parts: list[str] = []
123
+
124
+ # Add task name and model
125
+ status_parts.append(f"Task: {self.task.profile.name}")
126
+ status_parts.append(f"Model: {self.task.profile.model}")
127
+
128
+ # Add step progress
129
+ if self.progress_display:
130
+ progress_percent = int(
131
+ self.progress_display.current / self.progress_display.total * 100
132
+ )
133
+ status_parts.append(
134
+ f"Steps: {self.progress_display.current}/{self.progress_display.total} {progress_percent}%"
135
+ )
136
+
137
+ # Add sample progress
138
+ status_parts.append(f"Samples: {self.samples_complete}/{self.samples_total}")
139
+
140
+ # Add metrics
141
+ if self.current_metrics:
142
+ metric_str = task_metric(self.current_metrics)
143
+ status_parts.append(metric_str)
144
+
145
+ # Add resource usage
146
+ resources_dict: dict[str, str] = {}
147
+ for model, resource in concurrency_status_display().items():
148
+ resources_dict[model] = f"{resource[0]}/{resource[1]}"
149
+ resources = ", ".join(
150
+ [f"{key}: {value}" for key, value in resources_dict.items()]
151
+ )
152
+ status_parts.append(resources)
153
+
154
+ # Add rate limits
155
+ rate_limits = task_http_retries_str()
156
+ if rate_limits:
157
+ status_parts.append(rate_limits)
158
+
159
+ # Print on new line
160
+ logging.info(", ".join(status_parts), stacklevel=stacklevel)
161
+
162
+ def sample_complete(self, complete: int, total: int) -> None:
163
+ self.samples_complete = complete
164
+ self.samples_total = total
165
+ self._log_status_throttled(stacklevel=3)
166
+
167
+ def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
168
+ self.current_metrics = metrics
169
+ self._log_status_throttled(stacklevel=3)
170
+
171
+ def complete(self, result: TaskResult) -> None:
172
+ self.task.result = result
173
+ self._log_status(stacklevel=3)
@@ -25,7 +25,7 @@ from ..core.display import (
25
25
  TaskWithResult,
26
26
  )
27
27
  from ..core.footer import task_http_retries_str
28
- from ..core.panel import task_panel, task_targets
28
+ from ..core.panel import task_panel
29
29
  from ..core.results import task_metric, tasks_results
30
30
 
31
31
 
@@ -79,7 +79,7 @@ class PlainDisplay(Display):
79
79
  profile=profile,
80
80
  show_model=True,
81
81
  body="", # Empty body since we haven't started yet
82
- subtitle=(task_config(profile), task_targets(profile)),
82
+ subtitle=task_config(profile),
83
83
  footer=None,
84
84
  log_location=None,
85
85
  )
@@ -32,7 +32,7 @@ from ..core.display import (
32
32
  TaskWithResult,
33
33
  )
34
34
  from ..core.footer import task_footer
35
- from ..core.panel import task_panel, task_targets, task_title, tasks_title
35
+ from ..core.panel import task_panel, task_title, tasks_title
36
36
  from ..core.progress import (
37
37
  RichProgress,
38
38
  progress_description,
@@ -311,15 +311,13 @@ def task_live_status(
311
311
 
312
312
  # the panel contents
313
313
  config = task_config(tasks[0].profile, style=theme.light)
314
- targets = task_targets(tasks[0].profile)
315
- subtitle = config, targets
316
314
 
317
315
  # the panel
318
316
  return task_panel(
319
317
  profile=tasks[0].profile,
320
318
  show_model=len(tasks) == 1,
321
319
  body=Group("", progress),
322
- subtitle=subtitle,
320
+ subtitle=config,
323
321
  footer=task_footer(counters, theme.light),
324
322
  log_location=None,
325
323
  )
@@ -42,7 +42,7 @@ from ..core.display import (
42
42
  TaskWithResult,
43
43
  )
44
44
  from ..core.footer import task_footer
45
- from ..core.panel import task_targets, task_title, tasks_title
45
+ from ..core.panel import task_title, tasks_title
46
46
  from ..core.rich import record_console_input, rich_initialise, rich_theme
47
47
  from .theme import inspect_dark, inspect_light
48
48
  from .widgets.console import ConsoleView
@@ -296,13 +296,8 @@ class TaskScreenApp(App[TR]):
296
296
  tasks.config = task_config(
297
297
  self._tasks[0].profile, generate_config=not self._parallel
298
298
  )
299
- if not self._parallel:
300
- tasks.targets = task_targets(self._tasks[0].profile)
301
- else:
302
- tasks.targets = " \n "
303
299
  else:
304
300
  tasks.config = ""
305
- tasks.targets = ""
306
301
 
307
302
  def update_samples(self) -> None:
308
303
  samples_view = self.query_one(SamplesView)
@@ -30,6 +30,8 @@ class TaskDetail(Widget):
30
30
  width: 100%;
31
31
  height: auto;
32
32
  grid-gutter: 1 3;
33
+ grid-size-columns: 3;
34
+ grid-columns: 1fr 1fr 1fr;
33
35
  }
34
36
  """
35
37
 
@@ -92,20 +94,6 @@ class TaskDetail(Widget):
92
94
  if len(self.by_reducer) == 0:
93
95
  return
94
96
 
95
- # Compute the row and column count
96
- row_count = len(self.by_reducer)
97
- col_count = len(next(iter(self.by_reducer.values())))
98
-
99
- # If this can fit in a single row, make it fit
100
- # otherwise place each reducer on their own row
101
- self.grid.styles.grid_columns = "auto"
102
- if row_count * col_count < 4:
103
- self.grid.styles.grid_size_columns = row_count * col_count
104
- self.grid.styles.grid_size_rows = 1
105
- else:
106
- self.grid.styles.grid_size_columns = col_count
107
- self.grid.styles.grid_size_rows = row_count
108
-
109
97
  # In order to reduce flashing the below tracks use of widgets
110
98
  # and updates them when possible (removing and adding them as needed)
111
99
  # Makes keys for tracking Task Metric widgets
@@ -142,6 +130,7 @@ class TaskMetrics(Widget):
142
130
  TaskMetrics {
143
131
  width: auto;
144
132
  height: auto;
133
+ border: solid $foreground 20%;
145
134
  }
146
135
  TaskMetrics Grid {
147
136
  width: auto;
@@ -174,7 +174,7 @@ class TaskProgressView(Widget):
174
174
  color:$text-secondary;
175
175
  }
176
176
  #task-detail {
177
- column-span: 8;
177
+ column-span: 9;
178
178
  }
179
179
  .hidden {
180
180
  display: none;
inspect_ai/_eval/eval.py CHANGED
@@ -812,7 +812,7 @@ async def eval_retry_async(
812
812
  model_roles = model_roles_config_to_model_roles(eval_log.eval.model_roles)
813
813
 
814
814
  # collect the rest of the params we need for the eval
815
- task_args = eval_log.eval.task_args
815
+ task_args = eval_log.eval.task_args_passed
816
816
  tags = eval_log.eval.tags
817
817
  limit = eval_log.eval.config.limit
818
818
  sample_id = eval_log.eval.config.sample_id
@@ -114,7 +114,7 @@ def eval_set(
114
114
  (defaults to 30, resulting in waits of 30, 60, 120, 240, etc.). Wait time
115
115
  per-retry will in no case by longer than 1 hour.
116
116
  retry_connections: Reduce max_connections at this rate with each retry
117
- (defaults to 0.5)
117
+ (defaults to 1.0, which results in no reduction).
118
118
  retry_cleanup: Cleanup failed log files after retries
119
119
  (defaults to True)
120
120
  model: Model(s) for evaluation. If not specified use the value of the INSPECT_EVAL_MODEL
@@ -275,7 +275,7 @@ def eval_set(
275
275
  fs.mkdir(log_dir, exist_ok=True)
276
276
 
277
277
  # resolve some parameters
278
- retry_connections = retry_connections or 0.5
278
+ retry_connections = retry_connections or 1.0
279
279
  retry_cleanup = retry_cleanup is not False
280
280
  max_connections = starting_max_connections(models, GenerateConfig(**kwargs))
281
281
  max_tasks = max_tasks if max_tasks is not None else max(len(models), 4)
@@ -578,7 +578,7 @@ def task_identifier(task: ResolvedTask | EvalLog) -> str:
578
578
  else:
579
579
  task_file = task.eval.task_file or ""
580
580
  task_name = task.eval.task
581
- task_args = task.eval.task_args
581
+ task_args = task.eval.task_args_passed
582
582
  model = str(task.eval.model)
583
583
  model_roles = task.eval.model_roles or {}
584
584
 
@@ -8,6 +8,7 @@ from inspect_ai._util.error import PrerequisiteError
8
8
  from inspect_ai._util.package import get_installed_package_name
9
9
  from inspect_ai._util.registry import (
10
10
  RegistryInfo,
11
+ extract_named_params,
11
12
  registry_add,
12
13
  registry_create,
13
14
  registry_info,
@@ -17,7 +18,7 @@ from inspect_ai._util.registry import (
17
18
  )
18
19
 
19
20
  from .task import Task
20
- from .task.constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
21
+ from .task.constants import TASK_ALL_PARAMS_ATTR, TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
21
22
 
22
23
  MODEL_PARAM = "model"
23
24
 
@@ -133,6 +134,10 @@ def task(*args: Any, name: str | None = None, **attribs: Any) -> Any:
133
134
  **w_kwargs,
134
135
  )
135
136
 
137
+ # extract all task parameters including defaults
138
+ named_params = extract_named_params(task_type, True, *w_args, **w_kwargs)
139
+ setattr(task_instance, TASK_ALL_PARAMS_ATTR, named_params)
140
+
136
141
  # if its not from an installed package then it is a "local"
137
142
  # module import, so set its task file and run dir
138
143
  if get_installed_package_name(task_type) is None:
inspect_ai/_eval/run.py CHANGED
@@ -3,6 +3,7 @@ import os
3
3
  import sys
4
4
  from typing import Any, Awaitable, Callable, Set, cast
5
5
 
6
+ from inspect_ai._eval.task.constants import TASK_ALL_PARAMS_ATTR
6
7
  from inspect_ai._eval.task.task import Task
7
8
  from inspect_ai._util.environ import environ_vars
8
9
  from inspect_ai._util.trace import trace_action
@@ -208,7 +209,10 @@ async def eval_run(
208
209
  metrics=eval_metrics,
209
210
  sandbox=resolved_task.sandbox,
210
211
  task_attribs=task.attribs,
211
- task_args=resolved_task.task_args,
212
+ task_args=getattr(
213
+ task, TASK_ALL_PARAMS_ATTR, resolved_task.task_args
214
+ ),
215
+ task_args_passed=resolved_task.task_args,
212
216
  model_args=resolved_task.model.model_args,
213
217
  eval_config=task_eval_config,
214
218
  metadata=((metadata or {}) | (task.metadata or {})) or None,
@@ -1,2 +1,3 @@
1
1
  TASK_FILE_ATTR = "__task_file__"
2
2
  TASK_RUN_DIR_ATTR = "__task_run_dir__"
3
+ TASK_ALL_PARAMS_ATTR = "__task_all_params__"
@@ -71,6 +71,7 @@ class TaskLogger:
71
71
  sandbox: SandboxEnvironmentSpec | None,
72
72
  task_attribs: dict[str, Any],
73
73
  task_args: dict[str, Any],
74
+ task_args_passed: dict[str, Any],
74
75
  model_args: dict[str, Any],
75
76
  eval_config: EvalConfig,
76
77
  metadata: dict[str, Any] | None,
@@ -128,6 +129,7 @@ class TaskLogger:
128
129
  task_registry_name=task_registry_name,
129
130
  task_attribs=task_attribs,
130
131
  task_args=task_args,
132
+ task_args_passed=task_args_passed,
131
133
  solver=solver.solver if solver else None,
132
134
  tags=tags,
133
135
  solver_args=solver.args if solver else None,