inspect-ai 0.3.56__py3-none-any.whl → 0.3.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. inspect_ai/_display/core/panel.py +1 -1
  2. inspect_ai/_eval/run.py +16 -11
  3. inspect_ai/_util/datetime.py +1 -1
  4. inspect_ai/_util/deprecation.py +1 -1
  5. inspect_ai/_util/json.py +11 -1
  6. inspect_ai/_util/logger.py +2 -1
  7. inspect_ai/_util/trace.py +39 -3
  8. inspect_ai/_util/transcript.py +36 -7
  9. inspect_ai/_view/www/.prettierrc.js +12 -0
  10. inspect_ai/_view/www/dist/assets/index.js +286 -224
  11. inspect_ai/_view/www/log-schema.json +124 -125
  12. inspect_ai/_view/www/src/App.mjs +18 -9
  13. inspect_ai/_view/www/src/Types.mjs +0 -1
  14. inspect_ai/_view/www/src/api/Types.mjs +15 -4
  15. inspect_ai/_view/www/src/api/api-http.mjs +2 -0
  16. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
  17. inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
  18. inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
  19. inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
  20. inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
  21. inspect_ai/_view/www/src/components/Tools.mjs +18 -3
  22. inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
  23. inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
  24. inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
  25. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
  26. inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
  27. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
  28. inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
  29. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
  30. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
  31. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
  32. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
  33. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
  34. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
  35. inspect_ai/_view/www/src/types/log.d.ts +2 -8
  36. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  37. inspect_ai/log/_log.py +25 -0
  38. inspect_ai/log/_recorders/eval.py +2 -0
  39. inspect_ai/model/_call_tools.py +27 -5
  40. inspect_ai/model/_providers/google.py +24 -6
  41. inspect_ai/model/_providers/openai.py +17 -3
  42. inspect_ai/model/_providers/openai_o1.py +10 -12
  43. inspect_ai/tool/_tool_info.py +2 -1
  44. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
  45. inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -3
  46. inspect_ai/util/__init__.py +4 -0
  47. inspect_ai/util/_sandbox/docker/compose.py +1 -3
  48. inspect_ai/util/_sandbox/docker/util.py +2 -1
  49. inspect_ai/util/_sandbox/self_check.py +18 -18
  50. inspect_ai/util/_store.py +2 -2
  51. inspect_ai/util/_subprocess.py +3 -3
  52. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
  53. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +57 -56
  54. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
  55. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
  56. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
  57. {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -48,11 +48,13 @@ export const Navbar = ({
48
48
  if (status === "success") {
49
49
  statusPanel = html`<${ResultsPanel} results="${results}" />`;
50
50
  } else if (status === "cancelled") {
51
- statusPanel = html`<${CanceledPanel}
51
+ statusPanel = html`<${CancelledPanel}
52
52
  sampleCount=${samples?.length || 0}
53
53
  />`;
54
54
  } else if (status === "started") {
55
- statusPanel = html`<${RunningPanel} />`;
55
+ statusPanel = html`<${RunningPanel} sampleCount=${samples?.length || 0} />`;
56
+ } else if (status === "error") {
57
+ statusPanel = html`<${ErroredPanel} sampleCount=${samples?.length || 0} />`;
56
58
  }
57
59
 
58
60
  // If no logfile is loaded, just show an empty navbar
@@ -188,48 +190,54 @@ export const Navbar = ({
188
190
  `;
189
191
  };
190
192
 
191
- const CanceledPanel = ({ sampleCount }) => {
193
+ const StatusPanel = ({ icon, status, sampleCount }) => {
192
194
  return html`<div
193
195
  style=${{
194
196
  padding: "1em",
195
197
  marginTop: "0.5em",
196
198
  textTransform: "uppercase",
197
199
  fontSize: FontSize.smaller,
200
+ display: "grid",
201
+ gridTemplateColumns: "auto auto",
198
202
  }}
199
203
  >
200
204
  <i
201
- class="${ApplicationIcons.logging.info}"
202
- style=${{ fontSize: FontSize.large, marginRight: "0.3em" }}
205
+ class="${icon}"
206
+ style=${{
207
+ fontSize: FontSize.large,
208
+ marginRight: "0.3em",
209
+ marginTop: "-0.1em",
210
+ }}
203
211
  />
204
- cancelled (${sampleCount} ${sampleCount === 1 ? "sample" : "samples"})
212
+ <div>
213
+ <div>${status}</div>
214
+ <div>(${sampleCount} ${sampleCount === 1 ? "sample" : "samples"})</div>
215
+ </div>
205
216
  </div>`;
206
217
  };
207
218
 
208
- const RunningPanel = () => {
209
- return html`
210
- <div
211
- style=${{
212
- marginTop: "0.5em",
213
- display: "inline-grid",
214
- gridTemplateColumns: "max-content max-content",
215
- }}
216
- >
217
- <div>
218
- <i class=${ApplicationIcons.running} />
219
- </div>
220
- <div
221
- style=${{
222
- marginLeft: "0.3em",
223
- paddingTop: "0.2em",
224
- fontSize: FontSize.smaller,
225
- ...TextStyle.label,
226
- ...TextStyle.secondary,
227
- }}
228
- >
229
- Running
230
- </div>
231
- </div>
232
- `;
219
+ const CancelledPanel = ({ sampleCount }) => {
220
+ return html`<${StatusPanel}
221
+ icon=${ApplicationIcons.logging.info}
222
+ status="Cancelled"
223
+ sampleCount=${sampleCount}
224
+ />`;
225
+ };
226
+
227
+ const ErroredPanel = ({ sampleCount }) => {
228
+ return html`<${StatusPanel}
229
+ icon=${ApplicationIcons.logging.error}
230
+ status="Task Failed"
231
+ sampleCount=${sampleCount}
232
+ />`;
233
+ };
234
+
235
+ const RunningPanel = ({ sampleCount }) => {
236
+ return html`<${StatusPanel}
237
+ icon=${ApplicationIcons.running}
238
+ status="Running"
239
+ sampleCount=${sampleCount}
240
+ />`;
233
241
  };
234
242
 
235
243
  const ResultsPanel = ({ results }) => {
@@ -298,6 +306,7 @@ const ResultsPanel = ({ results }) => {
298
306
  * @returns {import("preact").JSX.Element} The TranscriptView component.
299
307
  */
300
308
  const VerticalMetric = ({ metric, isFirst }) => {
309
+ // @ts-expect-error
301
310
  const reducer_component = metric.reducer
302
311
  ? html` <div
303
312
  style=${{
@@ -309,7 +318,10 @@ const VerticalMetric = ({ metric, isFirst }) => {
309
318
  ...TextStyle.secondary,
310
319
  }}
311
320
  >
312
- ${metric.reducer}
321
+ ${
322
+ // @ts-expect-error
323
+ metric.reducer
324
+ }
313
325
  </div>`
314
326
  : "";
315
327
 
@@ -422,8 +422,7 @@ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
422
422
 
423
423
  const fullAnswer =
424
424
  sample && sampleDescriptor
425
- ? // @ts-ignore
426
- sampleDescriptor.selectedScorer(sample).answer()
425
+ ? sampleDescriptor.selectedScorerDescriptor(sample).answer()
427
426
  : undefined;
428
427
  if (fullAnswer) {
429
428
  columns.push({
@@ -17,7 +17,22 @@ import { inputString } from "../utils/Format.mjs";
17
17
  const kSampleHeight = 88;
18
18
  const kSeparatorHeight = 24;
19
19
 
20
- // Convert samples to a datastructure which contemplates grouping, etc...
20
+ /**
21
+ * Convert samples to a datastructure which contemplates grouping, etc...
22
+ *
23
+ * @param {Object} props - The parameters for the component.
24
+ * @param {Object} props.listRef - The ref for the list.
25
+ * @param {import("./SamplesTab.mjs").ListItem[]} props.items - The samples.
26
+ * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
27
+ * @param {Object} props.style - The style for the element
28
+ * @param {number} props.selectedIndex - The index of the selected sample.
29
+ * @param {(index: number) => void} props.setSelectedIndex - The function to set the selected sample index.
30
+ * @param {import("../Types.mjs").ScoreLabel} props.selectedScore - The function to get the selected score.
31
+ * @param {() => void} props.nextSample - The function to move to the next sample.
32
+ * @param {() => void} props.prevSample - The function to move to the previous sample.
33
+ * @param {(index: number) => void} props.showSample - The function to show the sample.
34
+ * @returns {import("preact").JSX.Element} The SampleList component.
35
+ */
21
36
  export const SampleList = (props) => {
22
37
  const {
23
38
  listRef,
@@ -93,6 +108,7 @@ export const SampleList = (props) => {
93
108
  }
94
109
  }, [selectedIndex, rowMap, listRef]);
95
110
 
111
+ /** @param {import("./SamplesTab.mjs").ListItem} item */
96
112
  const renderRow = (item) => {
97
113
  if (item.type === "sample") {
98
114
  return html`
@@ -145,7 +161,7 @@ export const SampleList = (props) => {
145
161
  );
146
162
 
147
163
  const listStyle = { ...style, flex: "1", overflowY: "auto", outline: "none" };
148
- const { limit, answer, target } = gridColumns(sampleDescriptor);
164
+ const { input, limit, answer, target } = gridColumns(sampleDescriptor);
149
165
 
150
166
  const headerRow = html`<div
151
167
  style=${{
@@ -160,7 +176,7 @@ export const SampleList = (props) => {
160
176
  }}
161
177
  >
162
178
  <div>Id</div>
163
- <div>Input</div>
179
+ <div>${input !== "0" ? "Input" : ""}</div>
164
180
  <div>${target !== "0" ? "Target" : ""}</div>
165
181
  <div>${answer !== "0" ? "Answer" : ""}</div>
166
182
  <div>${limit !== "0" ? "Limit" : ""}</div>
@@ -192,6 +208,7 @@ export const SampleList = (props) => {
192
208
  // Count any sample errors and display a bad alerting the user
193
209
  // to any errors
194
210
  const errorCount = items?.reduce((previous, item) => {
211
+ // @ts-expect-error
195
212
  if (item.data.error) {
196
213
  return previous + 1;
197
214
  } else {
@@ -201,6 +218,7 @@ export const SampleList = (props) => {
201
218
 
202
219
  // Count limits
203
220
  const limitCount = items?.reduce((previous, item) => {
221
+ // @ts-expect-error
204
222
  if (item.data.limit) {
205
223
  return previous + 1;
206
224
  } else {
@@ -260,6 +278,17 @@ const SeparatorRow = ({ id, title, height }) => {
260
278
  </div>`;
261
279
  };
262
280
 
281
+ /**
282
+ * @param {Object} props - The parameters for the component.
283
+ * @param {string} props.id - The unique identifier for the sample.
284
+ * @param {number} props.index - The index of the sample.
285
+ * @param {import("../api/Types.mjs").SampleSummary} props.sample - The sample.
286
+ * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
287
+ * @param {number} props.height - The height of the sample row.
288
+ * @param {boolean} props.selected - Whether the sample is selected.
289
+ * @param {(index: number) => void} props.showSample - The function to show the sample.
290
+ * @returns {import("preact").JSX.Element} The SampleRow component.
291
+ */
263
292
  const SampleRow = ({
264
293
  id,
265
294
  index,
@@ -339,7 +368,9 @@ const SampleRow = ({
339
368
  ${sample
340
369
  ? html`
341
370
  <${MarkdownDiv}
342
- markdown=${sampleDescriptor?.selectedScorer(sample).answer()}
371
+ markdown=${sampleDescriptor
372
+ ?.selectedScorerDescriptor(sample)
373
+ .answer()}
343
374
  style=${{ paddingLeft: "0" }}
344
375
  class="no-last-para-padding"
345
376
  />
@@ -14,6 +14,14 @@ const labelStyle = {
14
14
  ...TextStyle.secondary,
15
15
  };
16
16
 
17
+ /**
18
+ * @param {Object} props - The component props.
19
+ * @param {import("../types/log").EvalSample} props.sample - The sample.
20
+ * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
21
+ * @param {Object} props.style - The style for the element.
22
+ * @param {string} props.scorer - The scorer.
23
+ * @returns {import("preact").JSX.Element} The SampleScoreView component.
24
+ */
17
25
  export const SampleScoreView = ({
18
26
  sample,
19
27
  sampleDescriptor,
@@ -21,7 +29,7 @@ export const SampleScoreView = ({
21
29
  scorer,
22
30
  }) => {
23
31
  if (!sampleDescriptor) {
24
- return "";
32
+ return html``;
25
33
  }
26
34
 
27
35
  const scoreInput = inputString(sample.input);
@@ -34,7 +42,10 @@ export const SampleScoreView = ({
34
42
  );
35
43
  }
36
44
 
37
- const scorerDescriptor = sampleDescriptor.scorer(sample, scorer);
45
+ const scorerDescriptor = sampleDescriptor.evalDescriptor.scorerDescriptor(
46
+ sample,
47
+ { scorer, name: scorer },
48
+ );
38
49
  const explanation = scorerDescriptor.explanation() || "(No Explanation)";
39
50
  const answer = scorerDescriptor.answer();
40
51
  const metadata = scorerDescriptor.metadata();
@@ -1,9 +1,18 @@
1
1
  import { html } from "htm/preact";
2
2
 
3
+ /**
4
+ * @param {Object} props
5
+ * @param {import("../api/Types.mjs").SampleSummary} props.sample
6
+ * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor
7
+ * @param {string} props.scorer
8
+ * @returns {import("preact").JSX.Element}
9
+ */
3
10
  export const SampleScores = ({ sample, sampleDescriptor, scorer }) => {
4
11
  const scores = scorer
5
- ? sampleDescriptor.scorer(sample, scorer).scores()
6
- : sampleDescriptor.selectedScorer(sample).scores();
12
+ ? sampleDescriptor.evalDescriptor
13
+ .scorerDescriptor(sample, { scorer, name: scorer })
14
+ .scores()
15
+ : sampleDescriptor.selectedScorerDescriptor(sample).scores();
7
16
 
8
17
  if (scores.length === 1) {
9
18
  return scores[0].rendered();