inspect-ai 0.3.96__py3-none-any.whl → 0.3.98__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. inspect_ai/_eval/eval.py +10 -2
  2. inspect_ai/_eval/run.py +6 -1
  3. inspect_ai/_eval/task/util.py +32 -3
  4. inspect_ai/_util/registry.py +7 -0
  5. inspect_ai/_util/timer.py +13 -0
  6. inspect_ai/_view/www/dist/assets/index.css +275 -195
  7. inspect_ai/_view/www/dist/assets/index.js +8568 -7376
  8. inspect_ai/_view/www/src/app/App.css +1 -0
  9. inspect_ai/_view/www/src/app/App.tsx +27 -10
  10. inspect_ai/_view/www/src/app/appearance/icons.ts +5 -0
  11. inspect_ai/_view/www/src/app/content/RecordTree.module.css +22 -0
  12. inspect_ai/_view/www/src/app/content/RecordTree.tsx +370 -0
  13. inspect_ai/_view/www/src/app/content/RenderedContent.module.css +5 -0
  14. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +32 -19
  15. inspect_ai/_view/www/src/app/content/record_processors/store.ts +101 -0
  16. inspect_ai/_view/www/src/app/content/record_processors/types.ts +3 -0
  17. inspect_ai/_view/www/src/app/content/types.ts +5 -0
  18. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -0
  19. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +35 -28
  20. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +1 -8
  21. inspect_ai/_view/www/src/app/log-view/navbar/PrimaryBar.tsx +2 -4
  22. inspect_ai/_view/www/src/app/log-view/navbar/ResultsPanel.tsx +13 -3
  23. inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.module.css +15 -0
  24. inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.tsx +14 -10
  25. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +9 -3
  26. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +1 -3
  27. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +8 -2
  28. inspect_ai/_view/www/src/app/log-view/types.ts +1 -0
  29. inspect_ai/_view/www/src/app/plan/ModelCard.module.css +7 -0
  30. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +5 -2
  31. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +13 -8
  32. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +63 -8
  33. inspect_ai/_view/www/src/app/routing/url.ts +45 -0
  34. inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +2 -1
  35. inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.tsx +15 -8
  36. inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +3 -0
  37. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +16 -5
  38. inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +9 -1
  39. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +68 -31
  40. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +12 -7
  41. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -5
  42. inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.module.css +9 -0
  43. inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +48 -18
  44. inspect_ai/_view/www/src/app/samples/chat/ChatView.tsx +0 -1
  45. inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.module.css +4 -0
  46. inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +41 -1
  47. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -0
  48. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +0 -3
  49. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -1
  50. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +1 -1
  51. inspect_ai/_view/www/src/app/samples/chat/tools/ToolOutput.module.css +1 -1
  52. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +5 -1
  53. inspect_ai/_view/www/src/app/samples/descriptor/score/PassFailScoreDescriptor.tsx +11 -6
  54. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +7 -0
  55. inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +5 -18
  56. inspect_ai/_view/www/src/app/samples/sample-tools/SortFilter.tsx +1 -1
  57. inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.tsx +18 -5
  58. inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.module.css +0 -6
  59. inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.tsx +4 -1
  60. inspect_ai/_view/www/src/app/samples/transcript/ApprovalEventView.tsx +4 -2
  61. inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +6 -4
  62. inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.module.css +1 -1
  63. inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +13 -6
  64. inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +6 -4
  65. inspect_ai/_view/www/src/app/samples/transcript/LoggerEventView.tsx +4 -2
  66. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +11 -8
  67. inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +14 -8
  68. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +13 -8
  69. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +25 -16
  70. inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +7 -5
  71. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +11 -28
  72. inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +12 -20
  73. inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +12 -31
  74. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +25 -29
  75. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +297 -0
  76. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +0 -8
  77. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +43 -25
  78. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +43 -0
  79. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +109 -43
  80. inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +19 -8
  81. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +128 -60
  82. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +14 -4
  83. inspect_ai/_view/www/src/app/samples/transcript/types.ts +6 -4
  84. inspect_ai/_view/www/src/app/types.ts +12 -1
  85. inspect_ai/_view/www/src/components/Card.css +6 -3
  86. inspect_ai/_view/www/src/components/Card.tsx +15 -2
  87. inspect_ai/_view/www/src/components/CopyButton.tsx +4 -6
  88. inspect_ai/_view/www/src/components/ExpandablePanel.module.css +20 -14
  89. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +17 -22
  90. inspect_ai/_view/www/src/components/LargeModal.tsx +5 -1
  91. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +25 -1
  92. inspect_ai/_view/www/src/components/MarkdownDiv.css +4 -0
  93. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +2 -2
  94. inspect_ai/_view/www/src/components/TabSet.module.css +6 -1
  95. inspect_ai/_view/www/src/components/TabSet.tsx +8 -2
  96. inspect_ai/_view/www/src/state/hooks.ts +83 -13
  97. inspect_ai/_view/www/src/state/logPolling.ts +2 -2
  98. inspect_ai/_view/www/src/state/logSlice.ts +1 -2
  99. inspect_ai/_view/www/src/state/logsSlice.ts +9 -9
  100. inspect_ai/_view/www/src/state/samplePolling.ts +1 -1
  101. inspect_ai/_view/www/src/state/sampleSlice.ts +134 -7
  102. inspect_ai/_view/www/src/state/scoring.ts +1 -1
  103. inspect_ai/_view/www/src/state/scrolling.ts +39 -6
  104. inspect_ai/_view/www/src/state/store.ts +5 -0
  105. inspect_ai/_view/www/src/state/store_filter.ts +47 -44
  106. inspect_ai/_view/www/src/utils/debugging.ts +95 -0
  107. inspect_ai/_view/www/src/utils/format.ts +2 -2
  108. inspect_ai/_view/www/src/utils/json.ts +29 -0
  109. inspect_ai/agent/__init__.py +2 -1
  110. inspect_ai/agent/_agent.py +12 -0
  111. inspect_ai/agent/_react.py +184 -48
  112. inspect_ai/agent/_types.py +14 -1
  113. inspect_ai/analysis/beta/__init__.py +0 -2
  114. inspect_ai/analysis/beta/_dataframe/columns.py +11 -16
  115. inspect_ai/analysis/beta/_dataframe/evals/table.py +65 -40
  116. inspect_ai/analysis/beta/_dataframe/events/table.py +24 -36
  117. inspect_ai/analysis/beta/_dataframe/messages/table.py +24 -15
  118. inspect_ai/analysis/beta/_dataframe/progress.py +35 -5
  119. inspect_ai/analysis/beta/_dataframe/record.py +13 -9
  120. inspect_ai/analysis/beta/_dataframe/samples/columns.py +1 -1
  121. inspect_ai/analysis/beta/_dataframe/samples/table.py +156 -46
  122. inspect_ai/analysis/beta/_dataframe/util.py +14 -12
  123. inspect_ai/dataset/_dataset.py +0 -1
  124. inspect_ai/model/_call_tools.py +1 -1
  125. inspect_ai/model/_providers/anthropic.py +18 -5
  126. inspect_ai/model/_providers/azureai.py +7 -2
  127. inspect_ai/model/_providers/google.py +6 -0
  128. inspect_ai/model/_providers/util/llama31.py +3 -3
  129. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/METADATA +2 -2
  130. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/RECORD +134 -129
  131. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/WHEEL +1 -1
  132. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.module.css +0 -48
  133. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +0 -276
  134. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/entry_points.txt +0 -0
  135. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/licenses/LICENSE +0 -0
  136. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/eval.py CHANGED
@@ -28,7 +28,7 @@ from inspect_ai._util.error import PrerequisiteError
28
28
  from inspect_ai._util.file import absolute_file_path
29
29
  from inspect_ai._util.logger import warn_once
30
30
  from inspect_ai._util.platform import platform_init
31
- from inspect_ai._util.registry import registry_lookup
31
+ from inspect_ai._util.registry import registry_lookup, registry_package_name
32
32
  from inspect_ai.approval._apply import init_tool_approval
33
33
  from inspect_ai.approval._policy import (
34
34
  ApprovalPolicy,
@@ -770,7 +770,15 @@ async def eval_retry_async(
770
770
  task = f"{task_file}@{task_name}"
771
771
  else:
772
772
  if registry_lookup("task", task_name) is None:
773
- raise FileNotFoundError(f"Task '{task_name}' not found.")
773
+ # if this object is in a package then let the user know
774
+ # that they need to register it to work with eval-retry
775
+ package_name = registry_package_name(task_name)
776
+ if package_name is not None:
777
+ raise FileNotFoundError(
778
+ f"Task '{task_name}' is located in package '{package_name}' but has not been registered so cannot be retried. See https://inspect.aisi.org.uk/tasks.html#packaging for additional details on registering tasks in packages."
779
+ )
780
+ else:
781
+ raise FileNotFoundError(f"Task '{task_name}' not found.")
774
782
  task = task_name
775
783
 
776
784
  # see if there is solver spec in the eval log
inspect_ai/_eval/run.py CHANGED
@@ -475,7 +475,12 @@ async def startup_sandbox_environments(
475
475
  sandboxenvs: Set[TaskSandboxEnvironment] = set()
476
476
  for task in tasks:
477
477
  # resolve each sample and add to sandboxenvs
478
- dataset = slice_dataset(task.task.dataset, config.limit, config.sample_id)
478
+ resolved_task_sample_ids = resolve_task_sample_ids(
479
+ task.task.name, config.sample_id
480
+ )
481
+ dataset = slice_dataset(
482
+ task.task.dataset, config.limit, resolved_task_sample_ids
483
+ )
479
484
  for sample in dataset:
480
485
  sandbox = await resolve_sandbox_for_task_and_sample(
481
486
  eval_sandbox, task.task, sample
@@ -1,7 +1,11 @@
1
1
  import os
2
+ import reprlib
2
3
  from copy import deepcopy
4
+ from logging import getLogger
3
5
  from typing import cast
4
6
 
7
+ from inspect_ai._util.error import PrerequisiteError
8
+ from inspect_ai._util.logger import warn_once
5
9
  from inspect_ai._util.path import cwd_relative_path
6
10
  from inspect_ai.dataset import Sample
7
11
  from inspect_ai.dataset._dataset import Dataset
@@ -10,6 +14,8 @@ from inspect_ai.model import ChatMessage, ChatMessageUser
10
14
  from ..task import Task
11
15
  from .constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
12
16
 
17
+ logger = getLogger(__name__)
18
+
13
19
 
14
20
  def sample_messages(sample: Sample) -> list[ChatMessage]:
15
21
  if isinstance(sample.input, str):
@@ -47,9 +53,32 @@ def slice_dataset(
47
53
  return id if isinstance(id, str) else str(id).zfill(20)
48
54
 
49
55
  if sample_id is not None:
50
- sample_id = sample_id if isinstance(sample_id, list) else [sample_id]
51
- sample_id = [normalise(id) for id in sample_id]
52
- return dataset.filter(lambda sample: normalise(sample.id) in sample_id)
56
+ # reduce to list of normalized sample ids
57
+ sample_ids = sample_id if isinstance(sample_id, list) else [sample_id]
58
+ sample_id = [normalise(id) for id in sample_ids]
59
+
60
+ # validate all the sample ids and warn if they aren't in the dataset
61
+ all_sample_ids_raw = [sample.id for sample in dataset]
62
+ all_sample_ids = [normalise(id) for id in all_sample_ids_raw]
63
+ for id in sample_id:
64
+ if id not in all_sample_ids:
65
+ warn_once(
66
+ logger, f"sample id '{id}' not found in dataset '{dataset.name}'."
67
+ )
68
+
69
+ # filter the dataset
70
+ filtered = dataset.filter(lambda sample: normalise(sample.id) in sample_id)
71
+
72
+ # raise error if we got no hits
73
+ if len(filtered) == 0:
74
+ filter = ",".join([str(id) for id in sample_id])
75
+ r = reprlib.Repr()
76
+ r.maxlist = 8
77
+ raise PrerequisiteError(
78
+ f"No matches in dataset '{dataset.name}' for sample_id filter '{filter}'\n({dataset.name} ids: {r.repr(all_sample_ids_raw)})"
79
+ )
80
+
81
+ return filtered
53
82
  else:
54
83
  dataset_limit = (
55
84
  slice(0, len(dataset))
@@ -183,6 +183,13 @@ def registry_lookup(type: RegistryType, name: str) -> object | None:
183
183
  return o
184
184
 
185
185
 
186
+ def registry_package_name(name: str) -> str | None:
187
+ if name.find("/") != -1 and name.find(".") == -1:
188
+ return name.split("/")[0]
189
+ else:
190
+ return None
191
+
192
+
186
193
  def registry_find(predicate: Callable[[RegistryInfo], bool]) -> list[object]:
187
194
  r"""Find objects in the registry that match the passed predicate.
188
195
 
@@ -0,0 +1,13 @@
1
+ import time
2
+ from contextlib import contextmanager
3
+ from typing import Iterator
4
+
5
+
6
+ @contextmanager
7
+ def execution_timer(name: str | None = None) -> Iterator[None]:
8
+ start_time = time.perf_counter()
9
+ yield
10
+ end_time = time.perf_counter()
11
+ print(
12
+ f"{name if name else ''} execution time: {end_time - start_time:.6f} seconds".strip()
13
+ )