inspect-ai 0.3.73__py3-none-any.whl → 0.3.75__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +3 -2
- inspect_ai/_cli/cache.py +1 -1
- inspect_ai/_cli/common.py +15 -0
- inspect_ai/_cli/eval.py +4 -5
- inspect_ai/_cli/log.py +1 -1
- inspect_ai/_cli/sandbox.py +1 -1
- inspect_ai/_cli/trace.py +1 -1
- inspect_ai/_cli/view.py +1 -1
- inspect_ai/_display/core/config.py +3 -1
- inspect_ai/_eval/eval.py +55 -61
- inspect_ai/_eval/evalset.py +63 -154
- inspect_ai/_eval/loader.py +27 -54
- inspect_ai/_eval/registry.py +1 -10
- inspect_ai/_eval/run.py +3 -4
- inspect_ai/_eval/task/__init__.py +8 -2
- inspect_ai/_eval/task/log.py +9 -1
- inspect_ai/_eval/task/resolved.py +35 -0
- inspect_ai/_eval/task/task.py +50 -69
- inspect_ai/_eval/task/tasks.py +30 -0
- inspect_ai/_util/constants.py +3 -0
- inspect_ai/_util/dotenv.py +17 -0
- inspect_ai/_util/registry.py +43 -2
- inspect_ai/_view/server.py +28 -10
- inspect_ai/_view/www/dist/assets/index.css +4 -3
- inspect_ai/_view/www/dist/assets/index.js +13030 -25523
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/appearance/styles.ts +6 -5
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +2 -2
- inspect_ai/_view/www/src/constants.ts +3 -0
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +141 -20
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +2 -1
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +7 -5
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +1 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +3 -1
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +5 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +5 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +17 -12
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -1
- inspect_ai/_view/www/yarn.lock +12 -5
- inspect_ai/log/_log.py +10 -1
- inspect_ai/log/_recorders/eval.py +27 -8
- inspect_ai/log/_recorders/json.py +2 -2
- inspect_ai/model/_cache.py +3 -1
- inspect_ai/model/_chat_message.py +12 -1
- inspect_ai/model/_model.py +25 -11
- inspect_ai/model/_providers/anthropic.py +34 -2
- inspect_ai/model/_providers/google.py +6 -2
- inspect_ai/model/_providers/none.py +31 -0
- inspect_ai/model/_providers/providers.py +7 -0
- inspect_ai/solver/_bridge/bridge.py +1 -1
- inspect_ai/solver/_chain.py +7 -6
- inspect_ai/tool/_tools/_computer/_computer.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +1 -1
- inspect_ai/tool/_tools/_web_search.py +2 -2
- inspect_ai/util/_sandbox/context.py +2 -1
- inspect_ai/util/_sandbox/environment.py +17 -2
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/METADATA +4 -4
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/RECORD +63 -60
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/top_level.txt +0 -0
@@ -39,7 +39,7 @@
|
|
39
39
|
"vite": "^5.3.2"
|
40
40
|
},
|
41
41
|
"dependencies": {
|
42
|
-
"@codemirror/autocomplete": "^6.18.
|
42
|
+
"@codemirror/autocomplete": "^6.18.6",
|
43
43
|
"@codemirror/language": "^6.10.8",
|
44
44
|
"@codemirror/lint": "^6.8.4",
|
45
45
|
"@codemirror/state": "^6.5.2",
|
@@ -62,7 +62,7 @@
|
|
62
62
|
"markdown-it": "^14.1.0",
|
63
63
|
"murmurhash": "^2.0.1",
|
64
64
|
"postcss-url": "^10.1.3",
|
65
|
-
"prismjs": "^1.
|
65
|
+
"prismjs": "^1.30.0",
|
66
66
|
"react": "^19.0.0",
|
67
67
|
"react-dom": "^19.0.0",
|
68
68
|
"react-virtuoso": "^4.12.5"
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import { CSSProperties } from "react";
|
1
2
|
import { FontSize, TextStyle } from "./fonts";
|
2
3
|
|
3
4
|
export const ApplicationStyles = {
|
@@ -9,15 +10,15 @@ export const ApplicationStyles = {
|
|
9
10
|
},
|
10
11
|
threeLineClamp: {
|
11
12
|
display: "-webkit-box",
|
12
|
-
|
13
|
-
|
13
|
+
WebkitLineClamp: "3",
|
14
|
+
WebkitBoxOrient: "vertical",
|
14
15
|
overflow: "hidden",
|
15
16
|
},
|
16
|
-
lineClamp: (len: number) => {
|
17
|
+
lineClamp: (len: number): CSSProperties => {
|
17
18
|
return {
|
18
19
|
display: "-webkit-box",
|
19
|
-
|
20
|
-
|
20
|
+
WebkitLineClamp: `${len}`,
|
21
|
+
WebkitBoxOrient: "vertical",
|
21
22
|
overflow: "hidden",
|
22
23
|
};
|
23
24
|
},
|
@@ -20,10 +20,10 @@ export const ANSIDisplay: FC<ANSIDisplayProps> = ({
|
|
20
20
|
let firstOutput = false;
|
21
21
|
return (
|
22
22
|
<div className={clsx("ansi-display", className)} style={{ ...style }}>
|
23
|
-
{ansiOutput.outputLines.map((line) => {
|
23
|
+
{ansiOutput.outputLines.map((line, index) => {
|
24
24
|
firstOutput = firstOutput || !!line.outputRuns.length;
|
25
25
|
return (
|
26
|
-
<div className={"ansi-display-line"}>
|
26
|
+
<div key={index} className={"ansi-display-line"}>
|
27
27
|
{!line.outputRuns.length ? (
|
28
28
|
firstOutput ? (
|
29
29
|
<br />
|
@@ -65,8 +65,54 @@ export const openRemoteZipFile = async (
|
|
65
65
|
);
|
66
66
|
const eocdrView = new DataView(eocdrBuffer.buffer);
|
67
67
|
|
68
|
-
|
69
|
-
|
68
|
+
// Check signature to make sure we found the EOCD record
|
69
|
+
if (eocdrView.getUint32(0, true) !== 0x06054b50) {
|
70
|
+
throw new Error("End of central directory record not found");
|
71
|
+
}
|
72
|
+
|
73
|
+
let centralDirOffset = eocdrView.getUint32(16, true);
|
74
|
+
let centralDirSize = eocdrView.getUint32(12, true);
|
75
|
+
|
76
|
+
// Check if we need to use ZIP64 format
|
77
|
+
const needsZip64 =
|
78
|
+
centralDirOffset === 0xffffffff || centralDirSize === 0xffffffff;
|
79
|
+
|
80
|
+
if (needsZip64) {
|
81
|
+
// We need to locate and read the ZIP64 EOCD record and locator
|
82
|
+
// First, read the ZIP64 EOCD locator which is just before the standard EOCD
|
83
|
+
// Standard EOCD (22 bytes) + Locator (20 bytes)
|
84
|
+
const locatorBuffer = await fetchBytes(
|
85
|
+
url,
|
86
|
+
contentLength - 22 - 20,
|
87
|
+
contentLength - 23,
|
88
|
+
);
|
89
|
+
const locatorView = new DataView(locatorBuffer.buffer);
|
90
|
+
|
91
|
+
// Verify the ZIP64 EOCD locator signature
|
92
|
+
if (locatorView.getUint32(0, true) !== 0x07064b50) {
|
93
|
+
throw new Error("ZIP64 End of central directory locator not found");
|
94
|
+
}
|
95
|
+
|
96
|
+
// Get the offset to the ZIP64 EOCD record
|
97
|
+
const zip64EOCDOffset = Number(locatorView.getBigUint64(8, true));
|
98
|
+
|
99
|
+
// Now read the ZIP64 EOCD record
|
100
|
+
const zip64EOCDBuffer = await fetchBytes(
|
101
|
+
url,
|
102
|
+
zip64EOCDOffset,
|
103
|
+
zip64EOCDOffset + 56,
|
104
|
+
);
|
105
|
+
const zip64EOCDView = new DataView(zip64EOCDBuffer.buffer);
|
106
|
+
|
107
|
+
// Verify the ZIP64 EOCD signature
|
108
|
+
if (zip64EOCDView.getUint32(0, true) !== 0x06064b50) {
|
109
|
+
throw new Error("ZIP64 End of central directory record not found");
|
110
|
+
}
|
111
|
+
|
112
|
+
// Get the 64-bit central directory size and offset
|
113
|
+
centralDirSize = Number(zip64EOCDView.getBigUint64(40, true));
|
114
|
+
centralDirOffset = Number(zip64EOCDView.getBigUint64(48, true));
|
115
|
+
}
|
70
116
|
|
71
117
|
// Fetch and parse the central directory
|
72
118
|
const centralDirBuffer = await fetchBytes(
|
@@ -75,6 +121,7 @@ export const openRemoteZipFile = async (
|
|
75
121
|
centralDirOffset + centralDirSize - 1,
|
76
122
|
);
|
77
123
|
const centralDirectory = parseCentralDirectory(centralDirBuffer);
|
124
|
+
|
78
125
|
return {
|
79
126
|
centralDirectory: centralDirectory,
|
80
127
|
readFile: async (file, maxBytes): Promise<Uint8Array> => {
|
@@ -97,6 +144,8 @@ export const openRemoteZipFile = async (
|
|
97
144
|
|
98
145
|
// 28-29 bytes in local header
|
99
146
|
const extraFieldLength = headerData[28] + (headerData[29] << 8);
|
147
|
+
|
148
|
+
// Use the entry's compressed size from the central directory
|
100
149
|
const totalSizeToFetch =
|
101
150
|
headerSize + filenameLength + extraFieldLength + entry.compressedSize;
|
102
151
|
|
@@ -118,13 +167,13 @@ export const openRemoteZipFile = async (
|
|
118
167
|
// No compression
|
119
168
|
return zipFileEntry.data;
|
120
169
|
} else if (zipFileEntry.compressionMethod === 8) {
|
121
|
-
//
|
170
|
+
// Deflate compression
|
122
171
|
const results = await decompressAsync(zipFileEntry.data, {
|
123
172
|
size: zipFileEntry.uncompressedSize,
|
124
173
|
});
|
125
174
|
return results;
|
126
175
|
} else {
|
127
|
-
throw new Error(`Unsupported
|
176
|
+
throw new Error(`Unsupported compression method for file ${file}`);
|
128
177
|
}
|
129
178
|
},
|
130
179
|
};
|
@@ -194,15 +243,63 @@ const parseZipFileEntry = async (
|
|
194
243
|
offset += 4; // Skip last mod time and date
|
195
244
|
const crc32 = view.getUint32(offset, true);
|
196
245
|
offset += 4;
|
197
|
-
|
246
|
+
|
247
|
+
// Get initial sizes from standard header
|
248
|
+
let compressedSize = view.getUint32(offset, true);
|
198
249
|
offset += 4;
|
199
|
-
|
250
|
+
let uncompressedSize = view.getUint32(offset, true);
|
200
251
|
offset += 4;
|
252
|
+
|
201
253
|
const filenameLength = view.getUint16(offset, true);
|
202
254
|
offset += 2;
|
203
255
|
const extraFieldLength = view.getUint16(offset, true);
|
204
256
|
offset += 2;
|
205
257
|
|
258
|
+
// The original header offset
|
259
|
+
const headerOffset = offset;
|
260
|
+
|
261
|
+
// Check if we need to look for ZIP64 extra fields
|
262
|
+
const needsZip64 =
|
263
|
+
compressedSize === 0xffffffff || uncompressedSize === 0xffffffff;
|
264
|
+
|
265
|
+
if (needsZip64) {
|
266
|
+
// Skip the filename
|
267
|
+
offset += filenameLength;
|
268
|
+
|
269
|
+
// Look through extra fields for ZIP64 data
|
270
|
+
const extraFieldEnd = offset + extraFieldLength;
|
271
|
+
while (offset < extraFieldEnd) {
|
272
|
+
const tag = view.getUint16(offset, true);
|
273
|
+
const size = view.getUint16(offset + 2, true);
|
274
|
+
|
275
|
+
if (tag === 0x0001) {
|
276
|
+
// ZIP64 Extra Field
|
277
|
+
// Position in the extra field data
|
278
|
+
let zip64Offset = offset + 4;
|
279
|
+
|
280
|
+
// Read values in the order they appear in the ZIP64 extra field
|
281
|
+
if (
|
282
|
+
uncompressedSize === 0xffffffff &&
|
283
|
+
zip64Offset + 8 <= extraFieldEnd
|
284
|
+
) {
|
285
|
+
uncompressedSize = Number(view.getBigUint64(zip64Offset, true));
|
286
|
+
zip64Offset += 8;
|
287
|
+
}
|
288
|
+
|
289
|
+
if (compressedSize === 0xffffffff && zip64Offset + 8 <= extraFieldEnd) {
|
290
|
+
compressedSize = Number(view.getBigUint64(zip64Offset, true));
|
291
|
+
}
|
292
|
+
|
293
|
+
break;
|
294
|
+
}
|
295
|
+
offset += 4 + size;
|
296
|
+
}
|
297
|
+
|
298
|
+
// Reset offset
|
299
|
+
offset = headerOffset;
|
300
|
+
}
|
301
|
+
|
302
|
+
// Skip filename and extra field to get to the data
|
206
303
|
offset += filenameLength + extraFieldLength;
|
207
304
|
|
208
305
|
const data = rawData.subarray(offset, offset + compressedSize);
|
@@ -223,7 +320,7 @@ const kFileHeaderSize = 46;
|
|
223
320
|
/**
|
224
321
|
* Parses the central directory of a ZIP file from the provided buffer and returns a map of entries.
|
225
322
|
*/
|
226
|
-
const parseCentralDirectory = (buffer: Uint8Array
|
323
|
+
const parseCentralDirectory = (buffer: Uint8Array) => {
|
227
324
|
let offset = 0;
|
228
325
|
const view = new DataView(buffer.buffer);
|
229
326
|
const entries = new Map();
|
@@ -236,6 +333,11 @@ const parseCentralDirectory = (buffer: Uint8Array<ArrayBufferLike>) => {
|
|
236
333
|
const extraFieldLength = view.getUint16(offset + 30, true);
|
237
334
|
const fileCommentLength = view.getUint16(offset + 32, true);
|
238
335
|
|
336
|
+
// Get initial 32-bit values
|
337
|
+
let compressedSize = view.getUint32(offset + 20, true);
|
338
|
+
let uncompressedSize = view.getUint32(offset + 24, true);
|
339
|
+
let fileOffset = view.getUint32(offset + 42, true);
|
340
|
+
|
239
341
|
const filename = new TextDecoder().decode(
|
240
342
|
buffer.subarray(
|
241
343
|
offset + kFileHeaderSize,
|
@@ -243,34 +345,53 @@ const parseCentralDirectory = (buffer: Uint8Array<ArrayBufferLike>) => {
|
|
243
345
|
),
|
244
346
|
);
|
245
347
|
|
246
|
-
//
|
247
|
-
|
348
|
+
// Check if we need to use ZIP64 extra fields
|
349
|
+
const needsZip64 =
|
350
|
+
fileOffset === 0xffffffff ||
|
351
|
+
compressedSize === 0xffffffff ||
|
352
|
+
uncompressedSize === 0xffffffff;
|
248
353
|
|
249
|
-
|
250
|
-
if (fileOffset === 0xffffffff) {
|
354
|
+
if (needsZip64) {
|
251
355
|
// Move to extra field
|
252
356
|
let extraOffset = offset + kFileHeaderSize + filenameLength;
|
357
|
+
const extraEnd = extraOffset + extraFieldLength;
|
358
|
+
|
253
359
|
// Look through extra fields until we find zip64 extra field
|
254
|
-
while (
|
255
|
-
extraOffset <
|
256
|
-
offset + kFileHeaderSize + filenameLength + extraFieldLength
|
257
|
-
) {
|
360
|
+
while (extraOffset < extraEnd) {
|
258
361
|
const tag = view.getUint16(extraOffset, true);
|
259
362
|
const size = view.getUint16(extraOffset + 2, true);
|
363
|
+
|
260
364
|
if (tag === 0x0001) {
|
261
|
-
// ZIP64 Extra Field
|
262
|
-
|
365
|
+
// ZIP64 Extra Field
|
366
|
+
// Position in the extra field data
|
367
|
+
let zip64Offset = extraOffset + 4;
|
368
|
+
|
369
|
+
// Read values in the order they appear in the ZIP64 extra field
|
370
|
+
if (uncompressedSize === 0xffffffff && zip64Offset + 8 <= extraEnd) {
|
371
|
+
uncompressedSize = Number(view.getBigUint64(zip64Offset, true));
|
372
|
+
zip64Offset += 8;
|
373
|
+
}
|
374
|
+
|
375
|
+
if (compressedSize === 0xffffffff && zip64Offset + 8 <= extraEnd) {
|
376
|
+
compressedSize = Number(view.getBigUint64(zip64Offset, true));
|
377
|
+
zip64Offset += 8;
|
378
|
+
}
|
379
|
+
|
380
|
+
if (fileOffset === 0xffffffff && zip64Offset + 8 <= extraEnd) {
|
381
|
+
fileOffset = Number(view.getBigUint64(zip64Offset, true));
|
382
|
+
}
|
383
|
+
|
263
384
|
break;
|
264
385
|
}
|
265
|
-
extraOffset += 4 + size;
|
386
|
+
extraOffset += 4 + size;
|
266
387
|
}
|
267
388
|
}
|
268
389
|
|
269
390
|
const entry = {
|
270
391
|
filename,
|
271
392
|
compressionMethod: view.getUint16(offset + 10, true),
|
272
|
-
compressedSize
|
273
|
-
uncompressedSize
|
393
|
+
compressedSize,
|
394
|
+
uncompressedSize,
|
274
395
|
fileOffset,
|
275
396
|
};
|
276
397
|
|
@@ -8,6 +8,7 @@ import { SolversDetailView } from "./SolverDetailView";
|
|
8
8
|
|
9
9
|
import clsx from "clsx";
|
10
10
|
import { FC, ReactNode } from "react";
|
11
|
+
import { kModelNone } from "../constants";
|
11
12
|
import styles from "./PlanDetailView.module.css";
|
12
13
|
|
13
14
|
interface PlanDetailViewProps {
|
@@ -71,7 +72,7 @@ export const PlanDetailView: FC<PlanDetailViewProps> = ({
|
|
71
72
|
taskInformation["Tags"] = evaluation.tags.join(", ");
|
72
73
|
}
|
73
74
|
|
74
|
-
if (evaluation?.model) {
|
75
|
+
if (evaluation?.model && evaluation.model !== kModelNone) {
|
75
76
|
config["model"] = evaluation.model;
|
76
77
|
}
|
77
78
|
|
@@ -146,7 +146,7 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
|
146
146
|
// TODO: Cleanup once the PR lands which makes sample / sample summary share common interface
|
147
147
|
sampleDescriptor?.selectedScore(sample)?.render() || ""
|
148
148
|
),
|
149
|
-
size: "minmax(2em,
|
149
|
+
size: "minmax(2em, 30em)",
|
150
150
|
center: true,
|
151
151
|
});
|
152
152
|
|
@@ -53,11 +53,13 @@ const extractInput = (
|
|
53
53
|
): { input?: string; args: string[] } => {
|
54
54
|
const formatArg = (key: string, value: unknown) => {
|
55
55
|
const quotedValue =
|
56
|
-
|
57
|
-
?
|
58
|
-
: typeof value === "
|
59
|
-
?
|
60
|
-
:
|
56
|
+
value === null
|
57
|
+
? "None"
|
58
|
+
: typeof value === "string"
|
59
|
+
? `"${value}"`
|
60
|
+
: typeof value === "object" || Array.isArray(value)
|
61
|
+
? JSON.stringify(value, undefined, 2)
|
62
|
+
: String(value);
|
61
63
|
return `${key}: ${quotedValue}`;
|
62
64
|
};
|
63
65
|
if (args) {
|
@@ -15,7 +15,9 @@ export const FlatSampleError: FC<FlatSampleErrorViewProps> = ({ message }) => {
|
|
15
15
|
return (
|
16
16
|
<div className={clsx(styles.flatBody)}>
|
17
17
|
<i className={clsx(ApplicationIcons.error, styles.iconSmall)} />
|
18
|
-
<div className={clsx(styles.lineBase)}>
|
18
|
+
<div className={clsx(styles.lineBase, "text-truncate")}>
|
19
|
+
{errorType(message)}
|
20
|
+
</div>
|
19
21
|
</div>
|
20
22
|
);
|
21
23
|
};
|
@@ -138,8 +138,11 @@ const getLints = (
|
|
138
138
|
if (!filterError) return [];
|
139
139
|
return [
|
140
140
|
{
|
141
|
-
from: filterError.from || 0,
|
142
|
-
to:
|
141
|
+
from: Math.min(filterError.from || 0, view.state.doc.length),
|
142
|
+
to: Math.min(
|
143
|
+
filterError.to || view.state.doc.length,
|
144
|
+
view.state.doc.length,
|
145
|
+
),
|
143
146
|
severity: filterError.severity,
|
144
147
|
message: filterError.message,
|
145
148
|
},
|
@@ -17,7 +17,7 @@ import {
|
|
17
17
|
StoreSpecificRenderableTypes,
|
18
18
|
} from "./StateEventRenderers";
|
19
19
|
|
20
|
-
import {
|
20
|
+
import { FC, useMemo } from "react";
|
21
21
|
import styles from "./StateEventView.module.css";
|
22
22
|
|
23
23
|
interface StateEventViewProps {
|
@@ -246,6 +246,10 @@ const synthesizeComparable = (changes: Changes) => {
|
|
246
246
|
setPath(before, change.path, change.value);
|
247
247
|
break;
|
248
248
|
case "replace":
|
249
|
+
// 'Fill in' arrays with empty strings to ensure there is no unnecessary diff
|
250
|
+
initializeArrays(before, change.path);
|
251
|
+
initializeArrays(after, change.path);
|
252
|
+
|
249
253
|
setPath(before, change.path, change.replaced);
|
250
254
|
setPath(after, change.path, change.value);
|
251
255
|
break;
|
@@ -3,6 +3,7 @@ import { FC, useCallback } from "react";
|
|
3
3
|
import { SampleSummary } from "../../api/types";
|
4
4
|
import { ApplicationIcons } from "../../appearance/icons";
|
5
5
|
import { CopyButton } from "../../components/CopyButton";
|
6
|
+
import { kModelNone } from "../../constants";
|
6
7
|
import { EvalResults, EvalSpec, Status } from "../../types/log";
|
7
8
|
import { filename } from "../../utils/path";
|
8
9
|
import styles from "./PrimaryBar.module.css";
|
@@ -71,18 +72,22 @@ export const PrimaryBar: FC<PrimaryBarProps> = ({
|
|
71
72
|
>
|
72
73
|
{evalSpec?.task}
|
73
74
|
</div>
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
75
|
+
{evalSpec?.model && evalSpec.model !== kModelNone ? (
|
76
|
+
<div
|
77
|
+
id="task-model"
|
78
|
+
className={clsx(
|
79
|
+
"task-model",
|
80
|
+
"text-truncate",
|
81
|
+
styles.taskModel,
|
82
|
+
"text-size-base",
|
83
|
+
)}
|
84
|
+
title={evalSpec?.model}
|
85
|
+
>
|
86
|
+
{evalSpec?.model}
|
87
|
+
</div>
|
88
|
+
) : (
|
89
|
+
""
|
90
|
+
)}
|
86
91
|
</div>
|
87
92
|
<div className={clsx("text-size-small", styles.secondaryContainer)}>
|
88
93
|
<div className={clsx("navbar-secondary-text", "text-truncate")}>
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import clsx from "clsx";
|
2
2
|
import { FC, Fragment } from "react";
|
3
3
|
import { EvalLogHeader } from "../../api/types";
|
4
|
+
import { kModelNone } from "../../constants";
|
4
5
|
import { EvalStatus } from "./EvalStatus";
|
5
6
|
import styles from "./SidebarLogEntry.module.css";
|
6
7
|
|
@@ -51,7 +52,7 @@ export const SidebarLogEntry: FC<SidebarLogEntryProps> = ({
|
|
51
52
|
</div>
|
52
53
|
<small className={clsx("mb-1", "text-size-small")}>{timeStr}</small>
|
53
54
|
|
54
|
-
{model ? (
|
55
|
+
{model && model !== kModelNone ? (
|
55
56
|
<div>
|
56
57
|
<small className={clsx("mb-1", "text-size-small")}>{model}</small>
|
57
58
|
</div>
|
inspect_ai/_view/www/yarn.lock
CHANGED
@@ -220,13 +220,20 @@
|
|
220
220
|
"@codemirror/view" "^6.0.0"
|
221
221
|
crelt "^1.0.5"
|
222
222
|
|
223
|
-
"@codemirror/state@^6.0.0", "@codemirror/state@^6.4.0", "@codemirror/state@^6.5.0"
|
223
|
+
"@codemirror/state@^6.0.0", "@codemirror/state@^6.4.0", "@codemirror/state@^6.5.0":
|
224
224
|
version "6.5.1"
|
225
225
|
resolved "https://registry.yarnpkg.com/@codemirror/state/-/state-6.5.1.tgz#e5c0599f7b43cf03f19e05861317df5425c07904"
|
226
226
|
integrity sha512-3rA9lcwciEB47ZevqvD8qgbzhM9qMb8vCcQCNmDfVRPQG4JT9mSb0Jg8H7YjKGGQcFnLN323fj9jdnG59Kx6bg==
|
227
227
|
dependencies:
|
228
228
|
"@marijn/find-cluster-break" "^1.0.0"
|
229
229
|
|
230
|
+
"@codemirror/state@^6.5.2":
|
231
|
+
version "6.5.2"
|
232
|
+
resolved "https://registry.yarnpkg.com/@codemirror/state/-/state-6.5.2.tgz#8eca3a64212a83367dc85475b7d78d5c9b7076c6"
|
233
|
+
integrity sha512-FVqsPqtPWKVVL3dPSxy8wEF/ymIEuVzF1PK3VbUgrxXpJUSHQWWZz4JMToquRxnkw+36LTamCZG2iua2Ptq0fA==
|
234
|
+
dependencies:
|
235
|
+
"@marijn/find-cluster-break" "^1.0.0"
|
236
|
+
|
230
237
|
"@codemirror/view@^6.0.0", "@codemirror/view@^6.17.0", "@codemirror/view@^6.23.0", "@codemirror/view@^6.27.0", "@codemirror/view@^6.35.0":
|
231
238
|
version "6.36.2"
|
232
239
|
resolved "https://registry.yarnpkg.com/@codemirror/view/-/view-6.36.2.tgz#aeb644e161440734ac5a153bf6e5b4a4355047be"
|
@@ -862,10 +869,10 @@ argparse@^2.0.1:
|
|
862
869
|
resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38"
|
863
870
|
integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==
|
864
871
|
|
865
|
-
asciinema-player@^3.
|
866
|
-
version "3.
|
867
|
-
resolved "https://registry.yarnpkg.com/asciinema-player/-/asciinema-player-3.
|
868
|
-
integrity sha512-
|
872
|
+
asciinema-player@^3.9.0:
|
873
|
+
version "3.9.0"
|
874
|
+
resolved "https://registry.yarnpkg.com/asciinema-player/-/asciinema-player-3.9.0.tgz#c60742f85978e861b878fc7eb6289a5622c298af"
|
875
|
+
integrity sha512-SXVFImVzeNr8ZUdNIHABGuzlbnGWTKy245AquAjODsAnv+Lp6vxjYGN0LfA8ns30tnx/ag/bMrTbLq13TpHE6w==
|
869
876
|
dependencies:
|
870
877
|
"@babel/runtime" "^7.21.0"
|
871
878
|
solid-js "^1.3.0"
|
inspect_ai/log/_log.py
CHANGED
@@ -215,7 +215,16 @@ class EvalSample(BaseModel):
|
|
215
215
|
Returns:
|
216
216
|
StoreModel: Instance of model_cls bound to sample store data.
|
217
217
|
"""
|
218
|
-
|
218
|
+
# un-namespace names for creation
|
219
|
+
data = {
|
220
|
+
k.replace(f"{model_cls.__name__}:", "", 1): v for k, v in self.store.items()
|
221
|
+
}
|
222
|
+
|
223
|
+
# since we are reading from the log provide a fully detached store
|
224
|
+
data["store"] = Store()
|
225
|
+
|
226
|
+
# create the model
|
227
|
+
return model_cls.model_validate(data)
|
219
228
|
|
220
229
|
events: list[Event] = Field(default_factory=list)
|
221
230
|
"""Events that occurred during sample execution."""
|
@@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
|
|
10
10
|
from pydantic_core import to_json
|
11
11
|
from typing_extensions import override
|
12
12
|
|
13
|
-
from inspect_ai._util.constants import LOG_SCHEMA_VERSION
|
13
|
+
from inspect_ai._util.constants import DESERIALIZING_CONTEXT, LOG_SCHEMA_VERSION
|
14
14
|
from inspect_ai._util.content import (
|
15
15
|
ContentAudio,
|
16
16
|
ContentImage,
|
@@ -224,7 +224,9 @@ class EvalRecorder(FileRecorder):
|
|
224
224
|
with ZipFile(z, mode="r") as zip:
|
225
225
|
try:
|
226
226
|
with zip.open(_sample_filename(id, epoch), "r") as f:
|
227
|
-
return EvalSample
|
227
|
+
return EvalSample.model_validate(
|
228
|
+
json.load(f), context=DESERIALIZING_CONTEXT
|
229
|
+
)
|
228
230
|
except KeyError:
|
229
231
|
raise IndexError(
|
230
232
|
f"Sample id {id} for epoch {epoch} not found in log {location}"
|
@@ -414,7 +416,10 @@ def _read_log(log: BinaryIO, location: str, header_only: bool = False) -> EvalLo
|
|
414
416
|
if REDUCTIONS_JSON in zip.namelist():
|
415
417
|
with zip.open(REDUCTIONS_JSON, "r") as f:
|
416
418
|
reductions = [
|
417
|
-
EvalSampleReductions
|
419
|
+
EvalSampleReductions.model_validate(
|
420
|
+
reduction, context=DESERIALIZING_CONTEXT
|
421
|
+
)
|
422
|
+
for reduction in json.load(f)
|
418
423
|
]
|
419
424
|
if evalLog.results is not None:
|
420
425
|
evalLog.reductions = reductions
|
@@ -425,7 +430,11 @@ def _read_log(log: BinaryIO, location: str, header_only: bool = False) -> EvalLo
|
|
425
430
|
for name in zip.namelist():
|
426
431
|
if name.startswith(f"{SAMPLES_DIR}/") and name.endswith(".json"):
|
427
432
|
with zip.open(name, "r") as f:
|
428
|
-
samples.append(
|
433
|
+
samples.append(
|
434
|
+
EvalSample.model_validate(
|
435
|
+
json.load(f), context=DESERIALIZING_CONTEXT
|
436
|
+
),
|
437
|
+
)
|
429
438
|
sort_samples(samples)
|
430
439
|
evalLog.samples = samples
|
431
440
|
return evalLog
|
@@ -452,7 +461,10 @@ def _read_all_summaries(zip: ZipFile, count: int) -> list[SampleSummary]:
|
|
452
461
|
if SUMMARIES_JSON in zip.namelist():
|
453
462
|
summaries_raw = _read_json(zip, SUMMARIES_JSON)
|
454
463
|
if isinstance(summaries_raw, list):
|
455
|
-
return [
|
464
|
+
return [
|
465
|
+
SampleSummary.model_validate(value, context=DESERIALIZING_CONTEXT)
|
466
|
+
for value in summaries_raw
|
467
|
+
]
|
456
468
|
else:
|
457
469
|
raise ValueError(
|
458
470
|
f"Expected a list of summaries when reading {SUMMARIES_JSON}"
|
@@ -464,7 +476,14 @@ def _read_all_summaries(zip: ZipFile, count: int) -> list[SampleSummary]:
|
|
464
476
|
summary_path = _journal_summary_path(summary_file)
|
465
477
|
summary = _read_json(zip, summary_path)
|
466
478
|
if isinstance(summary, list):
|
467
|
-
summaries.extend(
|
479
|
+
summaries.extend(
|
480
|
+
[
|
481
|
+
SampleSummary.model_validate(
|
482
|
+
value, context=DESERIALIZING_CONTEXT
|
483
|
+
)
|
484
|
+
for value in summary
|
485
|
+
]
|
486
|
+
)
|
468
487
|
else:
|
469
488
|
raise ValueError(
|
470
489
|
f"Expected a list of summaries when reading {summary_file}"
|
@@ -476,12 +495,12 @@ def _read_header(zip: ZipFile, location: str) -> EvalLog:
|
|
476
495
|
# first see if the header is here
|
477
496
|
if HEADER_JSON in zip.namelist():
|
478
497
|
with zip.open(HEADER_JSON, "r") as f:
|
479
|
-
log = EvalLog(
|
498
|
+
log = EvalLog.model_validate(json.load(f), context=DESERIALIZING_CONTEXT)
|
480
499
|
log.location = location
|
481
500
|
return log
|
482
501
|
else:
|
483
502
|
with zip.open(_journal_path(START_JSON), "r") as f:
|
484
|
-
start = LogStart(
|
503
|
+
start = LogStart.model_validate(json.load(f), context=DESERIALIZING_CONTEXT)
|
485
504
|
return EvalLog(
|
486
505
|
version=start.version, eval=start.eval, plan=start.plan, location=location
|
487
506
|
)
|
@@ -7,7 +7,7 @@ from pydantic import BaseModel
|
|
7
7
|
from pydantic_core import from_json
|
8
8
|
from typing_extensions import override
|
9
9
|
|
10
|
-
from inspect_ai._util.constants import LOG_SCHEMA_VERSION
|
10
|
+
from inspect_ai._util.constants import DESERIALIZING_CONTEXT, LOG_SCHEMA_VERSION
|
11
11
|
from inspect_ai._util.error import EvalError
|
12
12
|
from inspect_ai._util.file import absolute_file_path, file
|
13
13
|
from inspect_ai._util.trace import trace_action
|
@@ -143,7 +143,7 @@ class JSONRecorder(FileRecorder):
|
|
143
143
|
with file(location, "r") as f:
|
144
144
|
# parse w/ pydantic
|
145
145
|
raw_data = from_json(f.read())
|
146
|
-
log = EvalLog(
|
146
|
+
log = EvalLog.model_validate(raw_data, context=DESERIALIZING_CONTEXT)
|
147
147
|
log.location = location
|
148
148
|
|
149
149
|
# fail for unknown version
|
inspect_ai/model/_cache.py
CHANGED
@@ -155,7 +155,9 @@ def _cache_key(entry: CacheEntry) -> str:
|
|
155
155
|
exclude=set(["max_retries", "timeout", "max_connections"])
|
156
156
|
)
|
157
157
|
),
|
158
|
-
",".join(
|
158
|
+
",".join(
|
159
|
+
[str(message.model_dump(exclude=set(["id"]))) for message in entry.input]
|
160
|
+
),
|
159
161
|
entry.base_url,
|
160
162
|
entry.tool_choice,
|
161
163
|
entry.tools,
|