inspect-ai 0.3.68__py3-none-any.whl → 0.3.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. inspect_ai/_cli/eval.py +13 -1
  2. inspect_ai/_display/plain/display.py +9 -11
  3. inspect_ai/_display/textual/app.py +5 -5
  4. inspect_ai/_display/textual/widgets/samples.py +47 -18
  5. inspect_ai/_display/textual/widgets/transcript.py +25 -12
  6. inspect_ai/_eval/eval.py +14 -2
  7. inspect_ai/_eval/evalset.py +6 -1
  8. inspect_ai/_eval/run.py +6 -0
  9. inspect_ai/_eval/task/run.py +44 -15
  10. inspect_ai/_eval/task/task.py +26 -3
  11. inspect_ai/_util/interrupt.py +15 -0
  12. inspect_ai/_util/logger.py +23 -0
  13. inspect_ai/_util/rich.py +7 -8
  14. inspect_ai/_util/text.py +301 -1
  15. inspect_ai/_util/transcript.py +10 -2
  16. inspect_ai/_util/working.py +46 -0
  17. inspect_ai/_view/www/dist/assets/index.css +56 -12
  18. inspect_ai/_view/www/dist/assets/index.js +905 -751
  19. inspect_ai/_view/www/log-schema.json +337 -2
  20. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  21. inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
  22. inspect_ai/_view/www/src/appearance/icons.ts +3 -1
  23. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
  24. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
  25. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
  26. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  27. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
  28. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +1 -1
  29. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
  30. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
  31. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
  32. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
  33. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
  34. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
  35. inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
  36. inspect_ai/_view/www/src/types/log.d.ts +188 -108
  37. inspect_ai/_view/www/src/utils/format.ts +7 -4
  38. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
  39. inspect_ai/log/__init__.py +2 -0
  40. inspect_ai/log/_condense.py +1 -0
  41. inspect_ai/log/_log.py +72 -12
  42. inspect_ai/log/_samples.py +5 -5
  43. inspect_ai/log/_transcript.py +31 -1
  44. inspect_ai/model/_call_tools.py +1 -1
  45. inspect_ai/model/_conversation.py +1 -1
  46. inspect_ai/model/_model.py +35 -16
  47. inspect_ai/model/_model_call.py +10 -3
  48. inspect_ai/model/_providers/anthropic.py +13 -2
  49. inspect_ai/model/_providers/bedrock.py +7 -0
  50. inspect_ai/model/_providers/cloudflare.py +20 -7
  51. inspect_ai/model/_providers/google.py +358 -302
  52. inspect_ai/model/_providers/groq.py +57 -23
  53. inspect_ai/model/_providers/hf.py +6 -0
  54. inspect_ai/model/_providers/mistral.py +81 -52
  55. inspect_ai/model/_providers/openai.py +9 -0
  56. inspect_ai/model/_providers/providers.py +6 -6
  57. inspect_ai/model/_providers/util/tracker.py +92 -0
  58. inspect_ai/model/_providers/vllm.py +13 -5
  59. inspect_ai/solver/_basic_agent.py +1 -3
  60. inspect_ai/solver/_bridge/patch.py +0 -2
  61. inspect_ai/solver/_limit.py +4 -4
  62. inspect_ai/solver/_plan.py +3 -3
  63. inspect_ai/solver/_solver.py +3 -0
  64. inspect_ai/solver/_task_state.py +10 -1
  65. inspect_ai/tool/_tools/_web_search.py +3 -3
  66. inspect_ai/util/_concurrency.py +14 -8
  67. inspect_ai/util/_sandbox/context.py +15 -0
  68. inspect_ai/util/_sandbox/docker/cleanup.py +8 -3
  69. inspect_ai/util/_sandbox/docker/compose.py +5 -9
  70. inspect_ai/util/_sandbox/docker/docker.py +20 -6
  71. inspect_ai/util/_sandbox/docker/util.py +10 -1
  72. inspect_ai/util/_sandbox/environment.py +32 -1
  73. inspect_ai/util/_sandbox/events.py +149 -0
  74. inspect_ai/util/_sandbox/local.py +3 -3
  75. inspect_ai/util/_sandbox/self_check.py +2 -1
  76. inspect_ai/util/_subprocess.py +4 -1
  77. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +5 -5
  78. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +82 -74
  79. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
  80. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
  81. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
  82. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0
@@ -24,6 +24,7 @@ import {
24
24
  } from "../constants";
25
25
  import { EvalSample } from "../types/log";
26
26
  import { ModelTokenTable } from "../usage/ModelTokenTable";
27
+ import { formatTime } from "../utils/format";
27
28
  import { printHeadingHtml, printHtml } from "../utils/print";
28
29
  import { ChatViewVirtualList } from "./chat/ChatViewVirtualList";
29
30
  import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
@@ -180,7 +181,7 @@ export const SampleDisplay: React.FC<SampleDisplayProps> = ({
180
181
  {sampleMetadatas.length > 0 ? (
181
182
  <TabPanel
182
183
  id={kSampleMetdataTabId}
183
- className="sample-tab"
184
+ className={clsx("sample-tab")}
184
185
  title="Metadata"
185
186
  onSelected={onSelectedTab}
186
187
  selected={selectedTab === kSampleMetdataTabId}
@@ -228,6 +229,7 @@ export const SampleDisplay: React.FC<SampleDisplayProps> = ({
228
229
 
229
230
  const metadataViewsForSample = (id: string, sample: EvalSample) => {
230
231
  const sampleMetadatas = [];
232
+
231
233
  if (sample.model_usage && Object.keys(sample.model_usage).length > 0) {
232
234
  sampleMetadatas.push(
233
235
  <Card key={`sample-usage-${id}`}>
@@ -242,6 +244,31 @@ const metadataViewsForSample = (id: string, sample: EvalSample) => {
242
244
  );
243
245
  }
244
246
 
247
+ if (
248
+ sample.total_time !== undefined &&
249
+ sample.total_time !== null &&
250
+ sample.working_time !== undefined &&
251
+ sample.working_time !== null
252
+ ) {
253
+ sampleMetadatas.push(
254
+ <Card key={`sample-time-${id}`}>
255
+ <CardHeader label="Time" />
256
+ <CardBody>
257
+ <div className={clsx(styles.timePanel, "text-size-smaller")}>
258
+ <div className={clsx("text-style-label", "text-style-secondary")}>
259
+ Working
260
+ </div>
261
+ <div>{formatTime(sample.working_time)}</div>
262
+ <div className={clsx("text-style-label", "text-style-secondary")}>
263
+ Total
264
+ </div>
265
+ <div>{formatTime(sample.total_time)}</div>
266
+ </div>
267
+ </CardBody>
268
+ </Card>,
269
+ );
270
+ }
271
+
245
272
  if (Object.keys(sample?.metadata).length > 0) {
246
273
  sampleMetadatas.push(
247
274
  <Card key={`sample-metadata-${id}`}>
@@ -22,3 +22,7 @@
22
22
  .wrap {
23
23
  word-wrap: anywhere;
24
24
  }
25
+
26
+ .titled:hover {
27
+ cursor: pointer;
28
+ }
@@ -1,7 +1,7 @@
1
1
  import clsx from "clsx";
2
2
  import { MarkdownDiv } from "../components/MarkdownDiv";
3
- import { EvalSample } from "../types/log";
4
- import { arrayToString, inputString } from "../utils/format";
3
+ import { EvalSample, WorkingTime } from "../types/log";
4
+ import { arrayToString, formatTime, inputString } from "../utils/format";
5
5
  import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
6
6
  import { FlatSampleError } from "./error/FlatSampleErrorView";
7
7
 
@@ -20,6 +20,7 @@ interface SummaryColumn {
20
20
  size: string;
21
21
  center?: boolean;
22
22
  clamp?: boolean;
23
+ title?: string;
23
24
  }
24
25
 
25
26
  /**
@@ -46,6 +47,7 @@ export const SampleSummaryView: React.FC<SampleSummaryViewProps> = ({
46
47
  sampleDescriptor?.messageShape.normalized.limit > 0
47
48
  ? Math.max(0.15, sampleDescriptor.messageShape.normalized.limit)
48
49
  : 0;
50
+ const timeSize = sample.working_time || sample.total_time ? 0.15 : 0;
49
51
  const idSize = Math.max(
50
52
  2,
51
53
  Math.min(10, sampleDescriptor?.messageShape.raw.id),
@@ -110,6 +112,23 @@ export const SampleSummaryView: React.FC<SampleSummaryViewProps> = ({
110
112
  });
111
113
  }
112
114
 
115
+ const toolTip = (working_time?: WorkingTime) => {
116
+ if (working_time === undefined || working_time === null) {
117
+ return undefined;
118
+ }
119
+ return `Working time: ${formatTime(working_time)}`;
120
+ };
121
+
122
+ if (sample.total_time) {
123
+ columns.push({
124
+ label: "Time",
125
+ value: formatTime(sample.total_time),
126
+ size: `${timeSize}fr`,
127
+ center: true,
128
+ title: toolTip(sample.working_time),
129
+ });
130
+ }
131
+
113
132
  if (sample?.limit && limitSize > 0) {
114
133
  columns.push({
115
134
  label: "Limit",
@@ -151,8 +170,10 @@ export const SampleSummaryView: React.FC<SampleSummaryViewProps> = ({
151
170
  "text-style-label",
152
171
  "text-style-secondary",
153
172
  "text-size-base",
173
+ col.title ? styles.titled : undefined,
154
174
  col.center ? styles.centerLabel : undefined,
155
175
  )}
176
+ title={col.title}
156
177
  >
157
178
  {col.label}
158
179
  </div>
@@ -49,7 +49,7 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
49
49
  ? value
50
50
  : parseFloat(value === true ? "1" : value),
51
51
  )
52
- : value;
52
+ : String(value);
53
53
  scores.push(
54
54
  <div
55
55
  className={clsx(
@@ -33,6 +33,8 @@ export const SampleLimitEventView: React.FC<SampleLimitEventViewProps> = ({
33
33
  return "Token Limit Exceeded";
34
34
  case "operator":
35
35
  return "Operator Canceled";
36
+ case "working":
37
+ return "Execution Time Limit Exceeded";
36
38
  }
37
39
  };
38
40
 
@@ -48,6 +50,8 @@ export const SampleLimitEventView: React.FC<SampleLimitEventViewProps> = ({
48
50
  return ApplicationIcons.limits.tokens;
49
51
  case "operator":
50
52
  return ApplicationIcons.limits.operator;
53
+ case "working":
54
+ return ApplicationIcons.limits.execution;
51
55
  }
52
56
  };
53
57
 
@@ -0,0 +1,32 @@
1
+ .contents {
2
+ margin-top: 0.5em;
3
+ }
4
+
5
+ .contents > :last-child {
6
+ margin-bottom: 0;
7
+ }
8
+
9
+ .twoColumn {
10
+ display: grid;
11
+ grid-template-columns: auto 1fr;
12
+ column-gap: 1.5em;
13
+ }
14
+
15
+ .exec {
16
+ margin-top: 0.5em;
17
+ }
18
+
19
+ .result {
20
+ margin-top: 0.5em;
21
+ }
22
+
23
+ .fileLabel {
24
+ margin-top: 0;
25
+ margin-bottom: 0;
26
+ }
27
+
28
+ .wrapPre {
29
+ white-space: pre-wrap;
30
+ word-wrap: break-word;
31
+ overflow-wrap: break-word;
32
+ }
@@ -0,0 +1,152 @@
1
+ import { ApplicationIcons } from "../../appearance/icons";
2
+ import ExpandablePanel from "../../components/ExpandablePanel";
3
+ import { MarkdownDiv } from "../../components/MarkdownDiv";
4
+ import { MetaDataGrid } from "../../metadata/MetaDataGrid";
5
+ import { SandboxEvent } from "../../types/log";
6
+ import { formatDateTime } from "../../utils/format";
7
+ import { EventPanel } from "./event/EventPanel";
8
+ import { EventSection } from "./event/EventSection";
9
+ import { TranscriptEventState } from "./types";
10
+
11
+ import clsx from "clsx";
12
+ import styles from "./SandboxEventView.module.css";
13
+
14
+ interface SandboxEventViewProps {
15
+ id: string;
16
+ event: SandboxEvent;
17
+ eventState: TranscriptEventState;
18
+ setEventState: (state: TranscriptEventState) => void;
19
+ className?: string | string[];
20
+ }
21
+
22
+ /**
23
+ * Renders the SandboxEventView component.
24
+ */
25
+ export const SandboxEventView: React.FC<SandboxEventViewProps> = ({
26
+ id,
27
+ event,
28
+ eventState,
29
+ setEventState,
30
+ className,
31
+ }) => {
32
+ return (
33
+ <EventPanel
34
+ id={id}
35
+ className={className}
36
+ title={`Sandbox: ${event.action}`}
37
+ icon={ApplicationIcons.sandbox}
38
+ subTitle={formatDateTime(new Date(event.timestamp))}
39
+ selectedNav={eventState.selectedNav || ""}
40
+ setSelectedNav={(selectedNav) => {
41
+ setEventState({ ...eventState, selectedNav });
42
+ }}
43
+ collapsed={eventState.collapsed}
44
+ setCollapsed={(collapsed) => {
45
+ setEventState({ ...eventState, collapsed });
46
+ }}
47
+ >
48
+ {event.action === "exec" ? (
49
+ <ExecView event={event} />
50
+ ) : event.action === "read_file" ? (
51
+ <ReadFileView event={event} />
52
+ ) : (
53
+ <WriteFileView event={event} />
54
+ )}
55
+ </EventPanel>
56
+ );
57
+ };
58
+
59
+ interface ExecViewProps {
60
+ event: SandboxEvent;
61
+ }
62
+
63
+ const ExecView: React.FC<ExecViewProps> = ({ event }) => {
64
+ if (event.cmd === null) {
65
+ return undefined;
66
+ }
67
+ const cmd = event.cmd;
68
+ const options = event.options;
69
+ const input = event.input;
70
+ const result = event.result;
71
+ const output = event.output;
72
+
73
+ return (
74
+ <div className={clsx(styles.exec)}>
75
+ <EventSection title={`Command`}>
76
+ <div className={clsx(styles.twoColumn)}>
77
+ <pre className={clsx(styles.wrapPre)}>{cmd}</pre>
78
+ <pre className={clsx(styles.wrapPre)}>
79
+ {input !== null ? input?.trim() : undefined}
80
+ </pre>
81
+
82
+ {options !== null ? (
83
+ <EventSection title={`Options`}>
84
+ <MetaDataGrid
85
+ entries={options as Record<string, unknown>}
86
+ plain={true}
87
+ />
88
+ </EventSection>
89
+ ) : undefined}
90
+ </div>
91
+ </EventSection>
92
+ <EventSection title={`Result`}>
93
+ {output ? (
94
+ <ExpandablePanel collapse={false}>
95
+ <MarkdownDiv markdown={output} />
96
+ </ExpandablePanel>
97
+ ) : undefined}
98
+ <div className={clsx(styles.result)}>Exited with code {result}</div>
99
+ </EventSection>
100
+ </div>
101
+ );
102
+ };
103
+
104
+ interface ReadFileViewProps {
105
+ event: SandboxEvent;
106
+ }
107
+
108
+ const ReadFileView: React.FC<ReadFileViewProps> = ({ event }) => {
109
+ if (event.file === null) {
110
+ return undefined;
111
+ }
112
+ const file = event.file;
113
+ const output = event.output;
114
+ return <FileView file={file} contents={output?.trim()} />;
115
+ };
116
+
117
+ interface WriteFileViewProps {
118
+ event: SandboxEvent;
119
+ }
120
+
121
+ const WriteFileView: React.FC<WriteFileViewProps> = ({ event }) => {
122
+ if (event.file === null) {
123
+ return undefined;
124
+ }
125
+ const file = event.file;
126
+ const input = event.input;
127
+
128
+ return <FileView file={file} contents={input?.trim()} />;
129
+ };
130
+
131
+ interface FileViewProps {
132
+ file: string;
133
+ contents?: string;
134
+ }
135
+
136
+ const FileView: React.FC<FileViewProps> = ({ file, contents }) => {
137
+ return (
138
+ <div>
139
+ <EventSection title="File">
140
+ <pre className={clsx(styles.fileLabel)}>{file}</pre>
141
+ </EventSection>
142
+
143
+ {contents ? (
144
+ <EventSection title="Contents">
145
+ <ExpandablePanel collapse={false}>
146
+ <pre>{contents}</pre>
147
+ </ExpandablePanel>
148
+ </EventSection>
149
+ ) : undefined}
150
+ </div>
151
+ );
152
+ };
@@ -47,7 +47,7 @@ export const StepEventView: React.FC<StepEventViewProps> = ({
47
47
  title={title}
48
48
  subTitle={formatDateTime(new Date(event.timestamp))}
49
49
  icon={descriptor.icon}
50
- collapse={false}
50
+ collapse={descriptor.collapse}
51
51
  text={text}
52
52
  selectedNav={eventState.selectedNav || ""}
53
53
  setSelectedNav={(selectedNav) => {
@@ -115,7 +115,7 @@ const summarize = (children: EventNode[]) => {
115
115
  */
116
116
  const stepDescriptor = (
117
117
  event: StepEvent,
118
- ): { icon?: string; name?: string; endSpace?: boolean } => {
118
+ ): { icon?: string; name?: string; endSpace?: boolean; collapse?: boolean } => {
119
119
  const rootStepDescriptor = {
120
120
  endSpace: true,
121
121
  };
@@ -161,6 +161,13 @@ const stepDescriptor = (
161
161
  return {
162
162
  ...rootStepDescriptor,
163
163
  name: "Sample Init",
164
+ collapse: true,
165
+ };
166
+ case "init":
167
+ return {
168
+ ...rootStepDescriptor,
169
+ name: "Init",
170
+ collapse: true,
164
171
  };
165
172
  default:
166
173
  return {
@@ -17,6 +17,7 @@ import { ToolEventView } from "./ToolEventView";
17
17
  import { EventNode, EventType, TranscriptEventState } from "./types";
18
18
 
19
19
  import clsx from "clsx";
20
+ import { SandboxEventView } from "./SandboxEventView";
20
21
  import styles from "./TranscriptView.module.css";
21
22
 
22
23
  interface TranscriptViewProps {
@@ -373,6 +374,17 @@ export const RenderedEventNode: React.FC<RenderedEventNodeProps> = ({
373
374
  case "approval":
374
375
  return <ApprovalEventView event={node.event} className={className} />;
375
376
 
377
+ case "sandbox":
378
+ return (
379
+ <SandboxEventView
380
+ id={id}
381
+ event={node.event}
382
+ className={className}
383
+ eventState={eventState}
384
+ setEventState={setEventState}
385
+ />
386
+ );
387
+
376
388
  default:
377
389
  return null;
378
390
  }
@@ -390,8 +402,14 @@ const fixupEventStream = (events: Events) => {
390
402
  // Filter pending events
391
403
  const finalEvents = events.filter((e) => !e.pending);
392
404
 
405
+ // See if the find an init step
406
+ const hasInitStep =
407
+ events.findIndex((e) => {
408
+ return e.event === "step" && e.name === "init";
409
+ }) !== -1;
410
+
393
411
  const fixedUp = [...finalEvents];
394
- if (initEvent) {
412
+ if (!hasInitStep && initEvent) {
395
413
  fixedUp.splice(initEventIndex, 0, {
396
414
  timestamp: initEvent.timestamp,
397
415
  event: "step",
@@ -55,16 +55,19 @@ export const EventPanel: React.FC<EventPanelProps> = ({
55
55
  const defaultPillId = pillId(0);
56
56
 
57
57
  const gridColumns = [];
58
+
59
+ // chevron
58
60
  if (hasCollapse) {
59
61
  gridColumns.push("minmax(0, max-content)");
60
62
  }
63
+
64
+ // icon
61
65
  if (icon) {
62
66
  gridColumns.push("max-content");
63
67
  }
68
+
69
+ // title
64
70
  gridColumns.push("minmax(0, max-content)");
65
- if (subTitle) {
66
- gridColumns.push("minmax(0, max-content)");
67
- }
68
71
  gridColumns.push("auto");
69
72
  gridColumns.push("minmax(0, max-content)");
70
73
  gridColumns.push("minmax(0, max-content)");
@@ -8,6 +8,7 @@ import {
8
8
  ModelEvent,
9
9
  SampleInitEvent,
10
10
  SampleLimitEvent,
11
+ SandboxEvent,
11
12
  ScoreEvent,
12
13
  StateEvent,
13
14
  StepEvent,
@@ -37,7 +38,8 @@ export type EventType =
37
38
  | ToolEvent
38
39
  | InputEvent
39
40
  | ErrorEvent
40
- | ApprovalEvent;
41
+ | ApprovalEvent
42
+ | SandboxEvent;
41
43
 
42
44
  export class EventNode {
43
45
  event: EventType;