inspect-ai 0.3.58__py3-none-any.whl → 0.3.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -2
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +78 -11
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/score.py +1 -0
  13. inspect_ai/_eval/task/results.py +50 -22
  14. inspect_ai/_eval/task/run.py +41 -7
  15. inspect_ai/_eval/task/sandbox.py +10 -5
  16. inspect_ai/_util/constants.py +1 -0
  17. inspect_ai/_util/port_names.py +61 -0
  18. inspect_ai/_util/text.py +23 -0
  19. inspect_ai/_view/www/App.css +31 -1
  20. inspect_ai/_view/www/dist/assets/index.css +31 -1
  21. inspect_ai/_view/www/dist/assets/index.js +25344 -1849
  22. inspect_ai/_view/www/log-schema.json +32 -2
  23. inspect_ai/_view/www/package.json +2 -0
  24. inspect_ai/_view/www/src/App.mjs +8 -10
  25. inspect_ai/_view/www/src/Types.mjs +0 -1
  26. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  27. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  28. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  29. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  30. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  31. inspect_ai/_view/www/src/index.js +75 -2
  32. inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
  33. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
  34. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  35. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  36. inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
  37. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  38. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +24 -12
  39. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
  40. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  41. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  42. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  43. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  44. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  45. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  46. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  47. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  48. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  49. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  50. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  51. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  52. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  53. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  54. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  55. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  56. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  57. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  58. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  59. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  60. inspect_ai/_view/www/src/utils/Json.mjs +12 -6
  61. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
  62. inspect_ai/_view/www/vite.config.js +7 -0
  63. inspect_ai/_view/www/yarn.lock +116 -0
  64. inspect_ai/approval/_human/__init__.py +0 -0
  65. inspect_ai/approval/_policy.py +12 -6
  66. inspect_ai/log/_log.py +1 -1
  67. inspect_ai/log/_samples.py +16 -0
  68. inspect_ai/log/_transcript.py +4 -1
  69. inspect_ai/model/_call_tools.py +4 -0
  70. inspect_ai/model/_conversation.py +20 -8
  71. inspect_ai/model/_generate_config.py +10 -4
  72. inspect_ai/model/_model.py +117 -18
  73. inspect_ai/model/_model_output.py +7 -2
  74. inspect_ai/model/_providers/anthropic.py +100 -44
  75. inspect_ai/model/_providers/azureai.py +20 -20
  76. inspect_ai/model/_providers/bedrock.py +37 -40
  77. inspect_ai/model/_providers/google.py +46 -54
  78. inspect_ai/model/_providers/mistral.py +11 -11
  79. inspect_ai/model/_providers/openai.py +15 -16
  80. inspect_ai/model/_providers/openai_o1.py +9 -8
  81. inspect_ai/model/_providers/providers.py +1 -1
  82. inspect_ai/model/_providers/together.py +8 -8
  83. inspect_ai/model/_providers/vertex.py +1 -4
  84. inspect_ai/scorer/_reducer/reducer.py +1 -1
  85. inspect_ai/scorer/_scorer.py +2 -2
  86. inspect_ai/solver/__init__.py +2 -5
  87. inspect_ai/solver/_prompt.py +35 -5
  88. inspect_ai/solver/_task_state.py +80 -38
  89. inspect_ai/tool/__init__.py +2 -0
  90. inspect_ai/tool/_tool.py +12 -1
  91. inspect_ai/tool/_tool_call.py +10 -0
  92. inspect_ai/tool/_tool_def.py +16 -5
  93. inspect_ai/tool/_tool_with.py +21 -4
  94. inspect_ai/tool/beta/__init__.py +5 -0
  95. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  96. inspect_ai/tool/beta/_computer/_common.py +133 -0
  97. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  98. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  99. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  100. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  101. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  102. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  103. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  104. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  105. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  106. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  107. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  108. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  109. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  110. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  111. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  112. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  113. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  114. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  115. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  116. inspect_ai/util/__init__.py +2 -0
  117. inspect_ai/util/_limit.py +26 -0
  118. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  119. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  120. inspect_ai/util/_sandbox/environment.py +14 -0
  121. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
  122. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +126 -98
  123. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  124. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
  125. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
  126. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
  127. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import { html } from "htm/preact";
2
- import { useCallback, useState } from "preact/hooks";
3
- import { useEffect, useMemo } from "preact/hooks";
2
+ import { useCallback, useMemo, useState } from "preact/hooks";
3
+ import { useEffect, useRef } from "preact/hooks";
4
4
 
5
5
  import { ApplicationStyles } from "../appearance/Styles.mjs";
6
6
  import { FontSize } from "../appearance/Fonts.mjs";
@@ -56,57 +56,28 @@ export const SampleList = (props) => {
56
56
  setHidden(false);
57
57
  }, [items]);
58
58
 
59
- const heightForType = (type) => {
60
- return type === "sample" ? kSampleHeight : kSeparatorHeight;
61
- };
62
-
63
- // Compute the row arrangement
64
- const rowMap = useMemo(() => {
65
- return items.reduce((values, current, index) => {
66
- const height = heightForType(current.type);
67
- const previous =
68
- values.length > 0 ? values[values.length - 1] : undefined;
69
- const start =
70
- previous === undefined ? 0 : previous.start + previous.height;
71
- values.push({
72
- index,
73
- height,
74
- start,
75
- });
76
- return values;
77
- }, []);
59
+ // Keep a mapping of the indexes to items (skipping separators)
60
+ const itemRowMapping = useMemo(() => {
61
+ const rowIndexes = [];
62
+ items.forEach((item, index) => {
63
+ if (item.type === "sample") {
64
+ rowIndexes.push(index);
65
+ }
66
+ });
67
+ return rowIndexes;
78
68
  }, [items]);
79
69
 
70
+ const prevSelectedIndexRef = useRef(null);
80
71
  useEffect(() => {
81
72
  const listEl = listRef.current;
82
73
  if (listEl) {
83
- // Decide if we need to scroll the element into position
84
- const selected = rowMap[selectedIndex];
85
- if (selected) {
86
- const itemTop = selected.start;
87
- const itemBottom = selected.start + selected.height;
88
-
89
- const scrollTop = listEl.base.scrollTop;
90
- const scrollBottom = scrollTop + listEl.base.offsetHeight;
91
-
92
- // It is visible
93
- if (itemTop >= scrollTop && itemBottom <= scrollBottom) {
94
- return;
95
- }
96
-
97
- if (itemTop < scrollTop) {
98
- // Top is scrolled off
99
- listEl.base.scrollTo({ top: itemTop });
100
- return;
101
- }
102
-
103
- if (itemBottom > scrollBottom) {
104
- listEl.base.scrollTo({ top: itemBottom - listEl.base.offsetHeight });
105
- return;
106
- }
107
- }
74
+ const actualRowIndex = itemRowMapping[selectedIndex];
75
+ const direction =
76
+ actualRowIndex > prevSelectedIndexRef.current ? "down" : "up";
77
+ listRef.current?.scrollToIndex(actualRowIndex, direction);
78
+ prevSelectedIndexRef.current = actualRowIndex;
108
79
  }
109
- }, [selectedIndex, rowMap, listRef]);
80
+ }, [selectedIndex, listRef, itemRowMapping]);
110
81
 
111
82
  /** @param {import("./SamplesTab.mjs").ListItem} item */
112
83
  const renderRow = (item) => {
@@ -254,7 +225,6 @@ export const SampleList = (props) => {
254
225
  tabIndex="0"
255
226
  renderRow=${renderRow}
256
227
  onkeydown=${onkeydown}
257
- rowMap=${rowMap}
258
228
  style=${listStyle}
259
229
  />
260
230
  ${footerRow}
@@ -1,6 +1,6 @@
1
1
  // @ts-check
2
2
  import { html } from "htm/preact";
3
- import { TranscriptView } from "./transcript/TranscriptView.mjs";
3
+ import { TranscriptVirtualList } from "./transcript/TranscriptView.mjs";
4
4
 
5
5
  /**
6
6
  * Renders the SampleTranscript component.
@@ -8,8 +8,13 @@ import { TranscriptView } from "./transcript/TranscriptView.mjs";
8
8
  * @param {Object} props - The parameters for the component.
9
9
  * @param {string} props.id - The id of this component
10
10
  * @param {import("../types/log").Events} props.evalEvents - The transcript to display.
11
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
11
12
  * @returns {import("preact").JSX.Element} The SampleTranscript component.
12
13
  */
13
- export const SampleTranscript = ({ id, evalEvents }) => {
14
- return html`<${TranscriptView} id=${id} events=${evalEvents} />`;
14
+ export const SampleTranscript = ({ id, evalEvents, scrollRef }) => {
15
+ return html`<${TranscriptVirtualList}
16
+ id=${id}
17
+ events=${evalEvents}
18
+ scrollRef=${scrollRef}
19
+ />`;
15
20
  };
@@ -83,6 +83,29 @@ import {
83
83
  * @property {number} normalized.limit - Normalized size of the limit message.
84
84
  */
85
85
 
86
+ /**
87
+ * @param {import("../Types.mjs").ScoreLabel | undefined} scoreLabel
88
+ * @returns {string}
89
+ */
90
+ export const scoreLabelKey = (scoreLabel) => {
91
+ if (!scoreLabel) {
92
+ return "No score key";
93
+ }
94
+ return `${scoreLabel.scorer}.${scoreLabel.name}`;
95
+ };
96
+
97
+ /**
98
+ * @param {string} key
99
+ * @returns {import("../Types.mjs").ScoreLabel | undefined}
100
+ */
101
+ export const parseScoreLabelKey = (key) => {
102
+ if (key == "No score key") {
103
+ return undefined;
104
+ }
105
+ const [scorer, name] = key.split(".");
106
+ return { scorer, name };
107
+ };
108
+
86
109
  /**
87
110
  * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
88
111
  * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
@@ -165,17 +188,6 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
165
188
  return undefined;
166
189
  };
167
190
 
168
- /**
169
- * @param {import("../Types.mjs").ScoreLabel} [scoreLabel]
170
- * @returns {string}
171
- */
172
- const scoreLabelKey = (scoreLabel) => {
173
- if (!scoreLabel) {
174
- return "No score key";
175
- }
176
- return `${scoreLabel.scorer}.${scoreLabel.name}`;
177
- };
178
-
179
191
  /**
180
192
  * The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
181
193
  * @type {Map<string, ScoreDescriptor>}
@@ -466,7 +478,7 @@ const scoreCategorizers = [
466
478
  * @returns {ScoreDescriptor} a ScoreDescriptor
467
479
  */
468
480
  describe: (values, types) => {
469
- if (values.length === 2 && types.length === 1 && types[0] === "boolean") {
481
+ if (types.length === 1 && types[0] === "boolean") {
470
482
  return booleanScoreCategorizer();
471
483
  }
472
484
  },
@@ -31,6 +31,7 @@ import { EmptyPanel } from "../components/EmptyPanel.mjs";
31
31
  * @param {import("../Types.mjs").ScoreFilter} props.filter - the selected filter
32
32
  * @param {import("htm/preact").MutableRef<number>} props.sampleScrollPositionRef - the sample scroll position
33
33
  * @param {(position: number) => void} props.setSampleScrollPosition - sets the sample scroll position
34
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.sampleTabScrollRef - the sample scroll element
34
35
  * @param {any} props.sort - the selected sort
35
36
  *
36
37
  * @returns {import("preact").JSX.Element[]} The TranscriptView component.
@@ -54,6 +55,7 @@ export const SamplesTab = ({
54
55
  setSelectedSampleTab,
55
56
  sampleScrollPositionRef,
56
57
  setSampleScrollPosition,
58
+ sampleTabScrollRef,
57
59
  }) => {
58
60
  /** @type {[ListItem[], function(ListItem[]): void]} */
59
61
  const [items, setItems] = useState([]);
@@ -82,7 +84,7 @@ export const SamplesTab = ({
82
84
  setTimeout(() => {
83
85
  if (sampleListRef.current) {
84
86
  // @ts-ignore
85
- sampleListRef.current.base.focus();
87
+ sampleListRef.current.focus();
86
88
  }
87
89
  }, 0);
88
90
  }
@@ -152,6 +154,7 @@ export const SamplesTab = ({
152
154
  sampleDescriptor=${sampleDescriptor}
153
155
  selectedTab=${selectedSampleTab}
154
156
  setSelectedTab=${setSelectedSampleTab}
157
+ scrollRef=${sampleTabScrollRef}
155
158
  />`,
156
159
  );
157
160
  } else if (sampleMode === "many") {
@@ -23,6 +23,14 @@ export const SampleTools = (props) => {
23
23
  const hasEpochs = epochs > 1;
24
24
  const tools = [];
25
25
 
26
+ tools.push(
27
+ html`<${SampleFilter}
28
+ evalDescriptor=${sampleDescriptor.evalDescriptor}
29
+ filter=${filter}
30
+ filterChanged=${filterChanged}
31
+ />`,
32
+ );
33
+
26
34
  if (scores.length > 1) {
27
35
  tools.push(
28
36
  html`<${SelectScorer}
@@ -43,14 +51,6 @@ export const SampleTools = (props) => {
43
51
  );
44
52
  }
45
53
 
46
- tools.push(
47
- html`<${SampleFilter}
48
- filter=${filter}
49
- filterChanged=${filterChanged}
50
- descriptor=${sampleDescriptor}
51
- />`,
52
- );
53
-
54
54
  tools.push(
55
55
  html`<${SortFilter}
56
56
  sampleDescriptor=${sampleDescriptor}