inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/common.py +7 -3
  3. inspect_ai/_cli/eval.py +17 -2
  4. inspect_ai/_cli/trace.py +21 -2
  5. inspect_ai/_display/core/active.py +4 -3
  6. inspect_ai/_display/core/config.py +3 -3
  7. inspect_ai/_display/core/panel.py +7 -3
  8. inspect_ai/_display/plain/__init__.py +0 -0
  9. inspect_ai/_display/plain/display.py +203 -0
  10. inspect_ai/_display/rich/display.py +4 -9
  11. inspect_ai/_display/textual/app.py +4 -1
  12. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  13. inspect_ai/_display/textual/widgets/samples.py +119 -16
  14. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  15. inspect_ai/_eval/eval.py +32 -20
  16. inspect_ai/_eval/evalset.py +7 -5
  17. inspect_ai/_eval/score.py +1 -0
  18. inspect_ai/_eval/task/__init__.py +2 -2
  19. inspect_ai/_eval/task/images.py +40 -25
  20. inspect_ai/_eval/task/results.py +50 -22
  21. inspect_ai/_eval/task/run.py +180 -124
  22. inspect_ai/_eval/task/sandbox.py +10 -5
  23. inspect_ai/_eval/task/task.py +140 -25
  24. inspect_ai/_util/constants.py +2 -0
  25. inspect_ai/_util/content.py +23 -1
  26. inspect_ai/_util/images.py +20 -17
  27. inspect_ai/_util/kvstore.py +73 -0
  28. inspect_ai/_util/notgiven.py +18 -0
  29. inspect_ai/_util/port_names.py +61 -0
  30. inspect_ai/_util/text.py +23 -0
  31. inspect_ai/_util/thread.py +5 -0
  32. inspect_ai/_view/www/App.css +31 -1
  33. inspect_ai/_view/www/dist/assets/index.css +31 -1
  34. inspect_ai/_view/www/dist/assets/index.js +25375 -1846
  35. inspect_ai/_view/www/log-schema.json +129 -15
  36. inspect_ai/_view/www/package.json +2 -0
  37. inspect_ai/_view/www/src/App.mjs +8 -10
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  40. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  41. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  42. inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
  43. inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
  44. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  45. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  46. inspect_ai/_view/www/src/index.js +75 -2
  47. inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
  48. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
  49. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  50. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  51. inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
  52. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  53. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
  54. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
  55. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  56. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +62 -27
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/Json.mjs +12 -6
  76. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
  77. inspect_ai/_view/www/vite.config.js +7 -0
  78. inspect_ai/_view/www/yarn.lock +116 -0
  79. inspect_ai/approval/_human/__init__.py +0 -0
  80. inspect_ai/approval/_human/util.py +2 -2
  81. inspect_ai/approval/_policy.py +12 -6
  82. inspect_ai/dataset/_sources/csv.py +2 -1
  83. inspect_ai/dataset/_sources/json.py +2 -1
  84. inspect_ai/dataset/_sources/util.py +15 -7
  85. inspect_ai/log/_condense.py +11 -1
  86. inspect_ai/log/_log.py +3 -6
  87. inspect_ai/log/_recorders/eval.py +19 -8
  88. inspect_ai/log/_samples.py +26 -5
  89. inspect_ai/log/_transcript.py +32 -2
  90. inspect_ai/model/__init__.py +10 -2
  91. inspect_ai/model/_call_tools.py +59 -12
  92. inspect_ai/model/_chat_message.py +2 -4
  93. inspect_ai/model/_conversation.py +61 -0
  94. inspect_ai/model/_generate_config.py +10 -4
  95. inspect_ai/model/_model.py +117 -18
  96. inspect_ai/model/_model_output.py +7 -2
  97. inspect_ai/model/_providers/anthropic.py +109 -51
  98. inspect_ai/model/_providers/azureai.py +26 -24
  99. inspect_ai/model/_providers/bedrock.py +43 -44
  100. inspect_ai/model/_providers/google.py +121 -58
  101. inspect_ai/model/_providers/groq.py +7 -5
  102. inspect_ai/model/_providers/hf.py +11 -6
  103. inspect_ai/model/_providers/mistral.py +17 -20
  104. inspect_ai/model/_providers/openai.py +32 -21
  105. inspect_ai/model/_providers/openai_o1.py +9 -8
  106. inspect_ai/model/_providers/providers.py +1 -1
  107. inspect_ai/model/_providers/together.py +8 -8
  108. inspect_ai/model/_providers/vertex.py +18 -8
  109. inspect_ai/scorer/__init__.py +13 -2
  110. inspect_ai/scorer/_metrics/__init__.py +2 -2
  111. inspect_ai/scorer/_metrics/std.py +3 -3
  112. inspect_ai/scorer/_reducer/reducer.py +1 -1
  113. inspect_ai/scorer/_scorer.py +2 -2
  114. inspect_ai/solver/__init__.py +2 -5
  115. inspect_ai/solver/_prompt.py +35 -5
  116. inspect_ai/solver/_task_state.py +80 -38
  117. inspect_ai/tool/__init__.py +11 -1
  118. inspect_ai/tool/_tool.py +21 -3
  119. inspect_ai/tool/_tool_call.py +10 -0
  120. inspect_ai/tool/_tool_def.py +16 -5
  121. inspect_ai/tool/_tool_with.py +21 -4
  122. inspect_ai/tool/beta/__init__.py +5 -0
  123. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  124. inspect_ai/tool/beta/_computer/_common.py +133 -0
  125. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  126. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  127. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  128. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  129. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  130. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  131. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  134. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  135. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  136. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  137. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  138. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  139. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  144. inspect_ai/util/__init__.py +2 -3
  145. inspect_ai/util/{_trace.py → _conversation.py} +3 -17
  146. inspect_ai/util/_display.py +14 -4
  147. inspect_ai/util/_limit.py +26 -0
  148. inspect_ai/util/_sandbox/context.py +12 -13
  149. inspect_ai/util/_sandbox/docker/compose.py +24 -11
  150. inspect_ai/util/_sandbox/docker/docker.py +84 -14
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/environment.py +27 -1
  153. inspect_ai/util/_sandbox/local.py +1 -0
  154. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
  155. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
  156. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  157. inspect_ai/model/_trace.py +0 -48
  158. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
  159. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
  160. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
  161. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import { html } from "htm/preact";
2
- import { useCallback, useState } from "preact/hooks";
3
- import { useEffect, useMemo } from "preact/hooks";
2
+ import { useCallback, useMemo, useState } from "preact/hooks";
3
+ import { useEffect, useRef } from "preact/hooks";
4
4
 
5
5
  import { ApplicationStyles } from "../appearance/Styles.mjs";
6
6
  import { FontSize } from "../appearance/Fonts.mjs";
@@ -56,57 +56,28 @@ export const SampleList = (props) => {
56
56
  setHidden(false);
57
57
  }, [items]);
58
58
 
59
- const heightForType = (type) => {
60
- return type === "sample" ? kSampleHeight : kSeparatorHeight;
61
- };
62
-
63
- // Compute the row arrangement
64
- const rowMap = useMemo(() => {
65
- return items.reduce((values, current, index) => {
66
- const height = heightForType(current.type);
67
- const previous =
68
- values.length > 0 ? values[values.length - 1] : undefined;
69
- const start =
70
- previous === undefined ? 0 : previous.start + previous.height;
71
- values.push({
72
- index,
73
- height,
74
- start,
75
- });
76
- return values;
77
- }, []);
59
+ // Keep a mapping of the indexes to items (skipping separators)
60
+ const itemRowMapping = useMemo(() => {
61
+ const rowIndexes = [];
62
+ items.forEach((item, index) => {
63
+ if (item.type === "sample") {
64
+ rowIndexes.push(index);
65
+ }
66
+ });
67
+ return rowIndexes;
78
68
  }, [items]);
79
69
 
70
+ const prevSelectedIndexRef = useRef(null);
80
71
  useEffect(() => {
81
72
  const listEl = listRef.current;
82
73
  if (listEl) {
83
- // Decide if we need to scroll the element into position
84
- const selected = rowMap[selectedIndex];
85
- if (selected) {
86
- const itemTop = selected.start;
87
- const itemBottom = selected.start + selected.height;
88
-
89
- const scrollTop = listEl.base.scrollTop;
90
- const scrollBottom = scrollTop + listEl.base.offsetHeight;
91
-
92
- // It is visible
93
- if (itemTop >= scrollTop && itemBottom <= scrollBottom) {
94
- return;
95
- }
96
-
97
- if (itemTop < scrollTop) {
98
- // Top is scrolled off
99
- listEl.base.scrollTo({ top: itemTop });
100
- return;
101
- }
102
-
103
- if (itemBottom > scrollBottom) {
104
- listEl.base.scrollTo({ top: itemBottom - listEl.base.offsetHeight });
105
- return;
106
- }
107
- }
74
+ const actualRowIndex = itemRowMapping[selectedIndex];
75
+ const direction =
76
+ actualRowIndex > prevSelectedIndexRef.current ? "down" : "up";
77
+ listRef.current?.scrollToIndex(actualRowIndex, direction);
78
+ prevSelectedIndexRef.current = actualRowIndex;
108
79
  }
109
- }, [selectedIndex, rowMap, listRef]);
80
+ }, [selectedIndex, listRef, itemRowMapping]);
110
81
 
111
82
  /** @param {import("./SamplesTab.mjs").ListItem} item */
112
83
  const renderRow = (item) => {
@@ -254,7 +225,6 @@ export const SampleList = (props) => {
254
225
  tabIndex="0"
255
226
  renderRow=${renderRow}
256
227
  onkeydown=${onkeydown}
257
- rowMap=${rowMap}
258
228
  style=${listStyle}
259
229
  />
260
230
  ${footerRow}
@@ -1,6 +1,6 @@
1
1
  // @ts-check
2
2
  import { html } from "htm/preact";
3
- import { TranscriptView } from "./transcript/TranscriptView.mjs";
3
+ import { TranscriptVirtualList } from "./transcript/TranscriptView.mjs";
4
4
 
5
5
  /**
6
6
  * Renders the SampleTranscript component.
@@ -8,8 +8,13 @@ import { TranscriptView } from "./transcript/TranscriptView.mjs";
8
8
  * @param {Object} props - The parameters for the component.
9
9
  * @param {string} props.id - The id of this component
10
10
  * @param {import("../types/log").Events} props.evalEvents - The transcript to display.
11
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
11
12
  * @returns {import("preact").JSX.Element} The SampleTranscript component.
12
13
  */
13
- export const SampleTranscript = ({ id, evalEvents }) => {
14
- return html`<${TranscriptView} id=${id} events=${evalEvents} />`;
14
+ export const SampleTranscript = ({ id, evalEvents, scrollRef }) => {
15
+ return html`<${TranscriptVirtualList}
16
+ id=${id}
17
+ events=${evalEvents}
18
+ scrollRef=${scrollRef}
19
+ />`;
15
20
  };
@@ -83,6 +83,29 @@ import {
83
83
  * @property {number} normalized.limit - Normalized size of the limit message.
84
84
  */
85
85
 
86
+ /**
87
+ * @param {import("../Types.mjs").ScoreLabel | undefined} scoreLabel
88
+ * @returns {string}
89
+ */
90
+ export const scoreLabelKey = (scoreLabel) => {
91
+ if (!scoreLabel) {
92
+ return "No score key";
93
+ }
94
+ return `${scoreLabel.scorer}.${scoreLabel.name}`;
95
+ };
96
+
97
+ /**
98
+ * @param {string} key
99
+ * @returns {import("../Types.mjs").ScoreLabel | undefined}
100
+ */
101
+ export const parseScoreLabelKey = (key) => {
102
+ if (key == "No score key") {
103
+ return undefined;
104
+ }
105
+ const [scorer, name] = key.split(".");
106
+ return { scorer, name };
107
+ };
108
+
86
109
  /**
87
110
  * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
88
111
  * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
@@ -165,17 +188,6 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
165
188
  return undefined;
166
189
  };
167
190
 
168
- /**
169
- * @param {import("../Types.mjs").ScoreLabel} [scoreLabel]
170
- * @returns {string}
171
- */
172
- const scoreLabelKey = (scoreLabel) => {
173
- if (!scoreLabel) {
174
- return "No score key";
175
- }
176
- return `${scoreLabel.scorer}.${scoreLabel.name}`;
177
- };
178
-
179
191
  /**
180
192
  * The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
181
193
  * @type {Map<string, ScoreDescriptor>}
@@ -377,7 +389,11 @@ export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
377
389
  (previous, current) => {
378
390
  const text = inputString(current.input).join(" ");
379
391
  const scoreValue = evalDescriptor.score(current, selectedScore).value;
380
- const scoreText = scoreValue ? String(scoreValue) : "";
392
+ const scoreText = scoreValue
393
+ ? String(scoreValue)
394
+ : current.error
395
+ ? String(current.error)
396
+ : "";
381
397
  previous[0] = Math.min(Math.max(previous[0], text.length), 300);
382
398
  previous[1] = Math.min(
383
399
  Math.max(previous[1], arrayToString(current.target).length),
@@ -462,7 +478,7 @@ const scoreCategorizers = [
462
478
  * @returns {ScoreDescriptor} a ScoreDescriptor
463
479
  */
464
480
  describe: (values, types) => {
465
- if (values.length === 2 && types.length === 1 && types[0] === "boolean") {
481
+ if (types.length === 1 && types[0] === "boolean") {
466
482
  return booleanScoreCategorizer();
467
483
  }
468
484
  },
@@ -31,6 +31,7 @@ import { EmptyPanel } from "../components/EmptyPanel.mjs";
31
31
  * @param {import("../Types.mjs").ScoreFilter} props.filter - the selected filter
32
32
  * @param {import("htm/preact").MutableRef<number>} props.sampleScrollPositionRef - the sample scroll position
33
33
  * @param {(position: number) => void} props.setSampleScrollPosition - sets the sample scroll position
34
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.sampleTabScrollRef - the sample scroll element
34
35
  * @param {any} props.sort - the selected sort
35
36
  *
36
37
  * @returns {import("preact").JSX.Element[]} The TranscriptView component.
@@ -54,6 +55,7 @@ export const SamplesTab = ({
54
55
  setSelectedSampleTab,
55
56
  sampleScrollPositionRef,
56
57
  setSampleScrollPosition,
58
+ sampleTabScrollRef,
57
59
  }) => {
58
60
  /** @type {[ListItem[], function(ListItem[]): void]} */
59
61
  const [items, setItems] = useState([]);
@@ -82,7 +84,7 @@ export const SamplesTab = ({
82
84
  setTimeout(() => {
83
85
  if (sampleListRef.current) {
84
86
  // @ts-ignore
85
- sampleListRef.current.base.focus();
87
+ sampleListRef.current.focus();
86
88
  }
87
89
  }, 0);
88
90
  }
@@ -152,6 +154,7 @@ export const SamplesTab = ({
152
154
  sampleDescriptor=${sampleDescriptor}
153
155
  selectedTab=${selectedSampleTab}
154
156
  setSelectedTab=${setSelectedSampleTab}
157
+ scrollRef=${sampleTabScrollRef}
155
158
  />`,
156
159
  );
157
160
  } else if (sampleMode === "many") {
@@ -23,6 +23,14 @@ export const SampleTools = (props) => {
23
23
  const hasEpochs = epochs > 1;
24
24
  const tools = [];
25
25
 
26
+ tools.push(
27
+ html`<${SampleFilter}
28
+ evalDescriptor=${sampleDescriptor.evalDescriptor}
29
+ filter=${filter}
30
+ filterChanged=${filterChanged}
31
+ />`,
32
+ );
33
+
26
34
  if (scores.length > 1) {
27
35
  tools.push(
28
36
  html`<${SelectScorer}
@@ -43,14 +51,6 @@ export const SampleTools = (props) => {
43
51
  );
44
52
  }
45
53
 
46
- tools.push(
47
- html`<${SampleFilter}
48
- filter=${filter}
49
- filterChanged=${filterChanged}
50
- descriptor=${sampleDescriptor}
51
- />`,
52
- );
53
-
54
54
  tools.push(
55
55
  html`<${SortFilter}
56
56
  sampleDescriptor=${sampleDescriptor}