inspect-ai 0.3.87__py3-none-any.whl → 0.3.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +16 -0
- inspect_ai/_cli/score.py +1 -12
- inspect_ai/_cli/util.py +4 -2
- inspect_ai/_display/core/footer.py +2 -2
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_eval/context.py +7 -1
- inspect_ai/_eval/eval.py +51 -27
- inspect_ai/_eval/evalset.py +27 -10
- inspect_ai/_eval/loader.py +7 -8
- inspect_ai/_eval/run.py +23 -31
- inspect_ai/_eval/score.py +18 -1
- inspect_ai/_eval/task/log.py +5 -13
- inspect_ai/_eval/task/resolved.py +1 -0
- inspect_ai/_eval/task/run.py +231 -244
- inspect_ai/_eval/task/task.py +25 -2
- inspect_ai/_eval/task/util.py +1 -8
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/json.py +8 -3
- inspect_ai/_util/registry.py +30 -13
- inspect_ai/_view/www/App.css +5 -0
- inspect_ai/_view/www/dist/assets/index.css +55 -18
- inspect_ai/_view/www/dist/assets/index.js +550 -458
- inspect_ai/_view/www/log-schema.json +84 -1
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +1 -1
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +13 -8
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +3 -0
- inspect_ai/_view/www/src/plan/ModelCard.module.css +16 -0
- inspect_ai/_view/www/src/plan/ModelCard.tsx +93 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +5 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +6 -29
- inspect_ai/_view/www/src/types/log.d.ts +150 -129
- inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css +16 -0
- inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +5 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -0
- inspect_ai/agent/_agent.py +12 -0
- inspect_ai/agent/_as_tool.py +1 -1
- inspect_ai/agent/_bridge/bridge.py +9 -2
- inspect_ai/agent/_react.py +142 -74
- inspect_ai/agent/_run.py +13 -2
- inspect_ai/agent/_types.py +6 -0
- inspect_ai/approval/_apply.py +6 -9
- inspect_ai/approval/_approver.py +3 -3
- inspect_ai/approval/_auto.py +2 -2
- inspect_ai/approval/_call.py +20 -4
- inspect_ai/approval/_human/approver.py +3 -3
- inspect_ai/approval/_human/manager.py +2 -2
- inspect_ai/approval/_human/panel.py +3 -3
- inspect_ai/approval/_policy.py +3 -3
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_log.py +23 -2
- inspect_ai/log/_model.py +58 -0
- inspect_ai/log/_recorders/file.py +14 -3
- inspect_ai/log/_transcript.py +3 -0
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +15 -2
- inspect_ai/model/_model.py +49 -3
- inspect_ai/model/_openai.py +151 -21
- inspect_ai/model/_providers/anthropic.py +25 -14
- inspect_ai/model/_providers/bedrock.py +3 -3
- inspect_ai/model/_providers/cloudflare.py +29 -108
- inspect_ai/model/_providers/google.py +21 -10
- inspect_ai/model/_providers/grok.py +23 -17
- inspect_ai/model/_providers/groq.py +61 -37
- inspect_ai/model/_providers/llama_cpp_python.py +8 -9
- inspect_ai/model/_providers/mistral.py +8 -3
- inspect_ai/model/_providers/ollama.py +8 -9
- inspect_ai/model/_providers/openai.py +53 -157
- inspect_ai/model/_providers/openai_compatible.py +195 -0
- inspect_ai/model/_providers/openrouter.py +4 -15
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/model/_providers/together.py +25 -23
- inspect_ai/model/_trim.py +83 -0
- inspect_ai/solver/_plan.py +5 -3
- inspect_ai/tool/_tool_call.py +3 -0
- inspect_ai/tool/_tool_def.py +8 -2
- inspect_ai/util/__init__.py +3 -0
- inspect_ai/util/_concurrency.py +15 -2
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/RECORD +86 -81
- inspect_ai/_eval/task/rundir.py +0 -78
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/top_level.txt +0 -0
@@ -1342,6 +1342,43 @@
|
|
1342
1342
|
"type": "object",
|
1343
1343
|
"additionalProperties": false
|
1344
1344
|
},
|
1345
|
+
"EvalModelConfig": {
|
1346
|
+
"description": "Model config.",
|
1347
|
+
"properties": {
|
1348
|
+
"model": {
|
1349
|
+
"title": "Model",
|
1350
|
+
"type": "string"
|
1351
|
+
},
|
1352
|
+
"config": {
|
1353
|
+
"$ref": "#/$defs/GenerateConfig"
|
1354
|
+
},
|
1355
|
+
"base_url": {
|
1356
|
+
"anyOf": [
|
1357
|
+
{
|
1358
|
+
"type": "string"
|
1359
|
+
},
|
1360
|
+
{
|
1361
|
+
"type": "null"
|
1362
|
+
}
|
1363
|
+
],
|
1364
|
+
"default": null,
|
1365
|
+
"title": "Base Url"
|
1366
|
+
},
|
1367
|
+
"args": {
|
1368
|
+
"title": "Args",
|
1369
|
+
"type": "object"
|
1370
|
+
}
|
1371
|
+
},
|
1372
|
+
"required": [
|
1373
|
+
"model",
|
1374
|
+
"config",
|
1375
|
+
"base_url",
|
1376
|
+
"args"
|
1377
|
+
],
|
1378
|
+
"title": "EvalModelConfig",
|
1379
|
+
"type": "object",
|
1380
|
+
"additionalProperties": false
|
1381
|
+
},
|
1345
1382
|
"EvalPlan": {
|
1346
1383
|
"description": "Plan (solvers) used in evaluation.",
|
1347
1384
|
"properties": {
|
@@ -2250,6 +2287,9 @@
|
|
2250
2287
|
"title": "Model",
|
2251
2288
|
"type": "string"
|
2252
2289
|
},
|
2290
|
+
"model_generate_config": {
|
2291
|
+
"$ref": "#/$defs/GenerateConfig"
|
2292
|
+
},
|
2253
2293
|
"model_base_url": {
|
2254
2294
|
"anyOf": [
|
2255
2295
|
{
|
@@ -2266,6 +2306,21 @@
|
|
2266
2306
|
"title": "Model Args",
|
2267
2307
|
"type": "object"
|
2268
2308
|
},
|
2309
|
+
"model_roles": {
|
2310
|
+
"anyOf": [
|
2311
|
+
{
|
2312
|
+
"additionalProperties": {
|
2313
|
+
"$ref": "#/$defs/EvalModelConfig"
|
2314
|
+
},
|
2315
|
+
"type": "object"
|
2316
|
+
},
|
2317
|
+
{
|
2318
|
+
"type": "null"
|
2319
|
+
}
|
2320
|
+
],
|
2321
|
+
"default": null,
|
2322
|
+
"title": "Model Roles"
|
2323
|
+
},
|
2269
2324
|
"config": {
|
2270
2325
|
"$ref": "#/$defs/EvalConfig"
|
2271
2326
|
},
|
@@ -2355,8 +2410,10 @@
|
|
2355
2410
|
"dataset",
|
2356
2411
|
"sandbox",
|
2357
2412
|
"model",
|
2413
|
+
"model_generate_config",
|
2358
2414
|
"model_base_url",
|
2359
2415
|
"model_args",
|
2416
|
+
"model_roles",
|
2360
2417
|
"config",
|
2361
2418
|
"revision",
|
2362
2419
|
"packages",
|
@@ -3306,6 +3363,18 @@
|
|
3306
3363
|
"title": "Model",
|
3307
3364
|
"type": "string"
|
3308
3365
|
},
|
3366
|
+
"role": {
|
3367
|
+
"anyOf": [
|
3368
|
+
{
|
3369
|
+
"type": "string"
|
3370
|
+
},
|
3371
|
+
{
|
3372
|
+
"type": "null"
|
3373
|
+
}
|
3374
|
+
],
|
3375
|
+
"default": null,
|
3376
|
+
"title": "Role"
|
3377
|
+
},
|
3309
3378
|
"input": {
|
3310
3379
|
"items": {
|
3311
3380
|
"anyOf": [
|
@@ -3426,6 +3495,7 @@
|
|
3426
3495
|
"pending",
|
3427
3496
|
"event",
|
3428
3497
|
"model",
|
3498
|
+
"role",
|
3429
3499
|
"input",
|
3430
3500
|
"tools",
|
3431
3501
|
"tool_choice",
|
@@ -4595,6 +4665,18 @@
|
|
4595
4665
|
}
|
4596
4666
|
],
|
4597
4667
|
"default": null
|
4668
|
+
},
|
4669
|
+
"type": {
|
4670
|
+
"anyOf": [
|
4671
|
+
{
|
4672
|
+
"type": "string"
|
4673
|
+
},
|
4674
|
+
{
|
4675
|
+
"type": "null"
|
4676
|
+
}
|
4677
|
+
],
|
4678
|
+
"default": null,
|
4679
|
+
"title": "Type"
|
4598
4680
|
}
|
4599
4681
|
},
|
4600
4682
|
"required": [
|
@@ -4603,7 +4685,8 @@
|
|
4603
4685
|
"arguments",
|
4604
4686
|
"internal",
|
4605
4687
|
"parse_error",
|
4606
|
-
"view"
|
4688
|
+
"view",
|
4689
|
+
"type"
|
4607
4690
|
],
|
4608
4691
|
"title": "ToolCall",
|
4609
4692
|
"type": "object",
|
@@ -6,7 +6,7 @@ import { RenderedContent } from "./RenderedContent";
|
|
6
6
|
interface MetadataViewProps {
|
7
7
|
id?: string;
|
8
8
|
style?: CSSProperties;
|
9
|
-
entries: Record<string, unknown>;
|
9
|
+
entries: Record<string, unknown> | Array<{ name: string; value: unknown }>;
|
10
10
|
tableOptions?: string;
|
11
11
|
compact?: boolean;
|
12
12
|
className?: string | string[];
|
@@ -66,11 +66,6 @@ export const MetaDataView: FC<MetadataViewProps> = ({
|
|
66
66
|
)}
|
67
67
|
style={style}
|
68
68
|
>
|
69
|
-
<thead>
|
70
|
-
<tr>
|
71
|
-
<th colSpan={2} className={"th"}></th>
|
72
|
-
</tr>
|
73
|
-
</thead>
|
74
69
|
<tbody>{entryEls}</tbody>
|
75
70
|
</table>
|
76
71
|
);
|
@@ -80,11 +75,21 @@ export const MetaDataView: FC<MetadataViewProps> = ({
|
|
80
75
|
// or an array of record with name/value on way in
|
81
76
|
// but coerce to array of records for order
|
82
77
|
const toNameValues = (
|
83
|
-
entries?:
|
78
|
+
entries?:
|
79
|
+
| Array<{ name: string; value: unknown }>
|
80
|
+
| Record<string, unknown>
|
81
|
+
| Array<unknown>,
|
84
82
|
): Array<{ name: string; value: unknown }> | undefined => {
|
85
83
|
if (entries) {
|
86
84
|
if (Array.isArray(entries)) {
|
87
|
-
|
85
|
+
// filter arrays that don't contain the expected name value pairs
|
86
|
+
const filtered = entries.filter((entry) => {
|
87
|
+
if (entry && typeof entry === "object") {
|
88
|
+
return "name" in entry && "value" in entry;
|
89
|
+
}
|
90
|
+
return false;
|
91
|
+
});
|
92
|
+
return filtered as Array<{ name: string; value: unknown }>;
|
88
93
|
} else {
|
89
94
|
return Object.entries(entries || {}).map(([key, value]) => {
|
90
95
|
return { name: key, value };
|
@@ -147,6 +147,9 @@ const contentRenderers: Record<string, ContentRenderer> = {
|
|
147
147
|
canRender: (entry) => {
|
148
148
|
const isArray = Array.isArray(entry.value);
|
149
149
|
if (isArray) {
|
150
|
+
if (entry.value.length === 0 || entry.value.length === 1) {
|
151
|
+
return true;
|
152
|
+
}
|
150
153
|
const types = new Set(
|
151
154
|
entry.value
|
152
155
|
.filter((e: unknown) => e !== null)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
.container {
|
2
|
+
display: grid;
|
3
|
+
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
4
|
+
row-gap: 2em;
|
5
|
+
column-gap: 1em;
|
6
|
+
}
|
7
|
+
|
8
|
+
.modelInfo {
|
9
|
+
display: grid;
|
10
|
+
grid-template-columns: max-content auto;
|
11
|
+
column-gap: 1em;
|
12
|
+
}
|
13
|
+
|
14
|
+
.role {
|
15
|
+
grid-column: -1/1;
|
16
|
+
}
|
@@ -0,0 +1,93 @@
|
|
1
|
+
import { FC } from "react";
|
2
|
+
import { ApplicationIcons } from "../appearance/icons";
|
3
|
+
import { Card, CardBody, CardHeader } from "../components/Card";
|
4
|
+
import { EvalModelConfig, EvalSpec } from "../types/log";
|
5
|
+
|
6
|
+
import clsx from "clsx";
|
7
|
+
import { MetaDataGrid } from "../metadata/MetaDataGrid";
|
8
|
+
import styles from "./ModelCard.module.css";
|
9
|
+
|
10
|
+
interface ModelCardProps {
|
11
|
+
evalSpec?: EvalSpec;
|
12
|
+
}
|
13
|
+
|
14
|
+
/**
|
15
|
+
* Renders the plan card
|
16
|
+
*/
|
17
|
+
export const ModelCard: FC<ModelCardProps> = ({ evalSpec }) => {
|
18
|
+
if (!evalSpec) {
|
19
|
+
return undefined;
|
20
|
+
}
|
21
|
+
|
22
|
+
const modelsInfo: Record<string, EvalModelConfig> = {
|
23
|
+
eval: {
|
24
|
+
model: evalSpec.model,
|
25
|
+
base_url: evalSpec.model_base_url,
|
26
|
+
config: evalSpec.model_generate_config,
|
27
|
+
args: evalSpec.model_args,
|
28
|
+
},
|
29
|
+
...evalSpec.model_roles,
|
30
|
+
};
|
31
|
+
|
32
|
+
const noneEl = <span className="text-style-secondary">None</span>;
|
33
|
+
|
34
|
+
return (
|
35
|
+
<Card>
|
36
|
+
<CardHeader icon={ApplicationIcons.model} label="Models" />
|
37
|
+
<CardBody id={"task-model-card-body"}>
|
38
|
+
<div className={styles.container}>
|
39
|
+
{Object.keys(modelsInfo || {}).map((modelKey) => {
|
40
|
+
const modelInfo = modelsInfo[modelKey];
|
41
|
+
return (
|
42
|
+
<div
|
43
|
+
key={modelKey}
|
44
|
+
className={clsx(styles.modelInfo, "text-size-small")}
|
45
|
+
>
|
46
|
+
<div
|
47
|
+
className={clsx(
|
48
|
+
styles.role,
|
49
|
+
"text-style-label",
|
50
|
+
"text-style-secondary",
|
51
|
+
)}
|
52
|
+
>
|
53
|
+
{modelKey}
|
54
|
+
</div>
|
55
|
+
|
56
|
+
<div className={clsx("text-style-label")}>Model</div>
|
57
|
+
<div>{modelInfo.model}</div>
|
58
|
+
|
59
|
+
<div className={clsx("text-style-label")}>Base Url</div>
|
60
|
+
<div className="text-size-small">
|
61
|
+
{modelInfo.base_url || noneEl}
|
62
|
+
</div>
|
63
|
+
<div className={clsx("text-style-label")}>Configuration</div>
|
64
|
+
<div className="text-size-small">
|
65
|
+
{modelInfo.config &&
|
66
|
+
Object.keys(modelInfo.config).length > 0 ? (
|
67
|
+
<MetaDataGrid
|
68
|
+
entries={
|
69
|
+
modelInfo.config as any as Record<string, unknown>
|
70
|
+
}
|
71
|
+
/>
|
72
|
+
) : (
|
73
|
+
noneEl
|
74
|
+
)}
|
75
|
+
</div>
|
76
|
+
<div className={clsx("text-style-label")}>Args</div>
|
77
|
+
<div className="text-size-small">
|
78
|
+
{Object.keys(modelInfo.args).length > 0 ? (
|
79
|
+
<MetaDataGrid
|
80
|
+
entries={modelInfo.args as any as Record<string, unknown>}
|
81
|
+
/>
|
82
|
+
) : (
|
83
|
+
noneEl
|
84
|
+
)}
|
85
|
+
</div>
|
86
|
+
</div>
|
87
|
+
);
|
88
|
+
})}
|
89
|
+
</div>
|
90
|
+
</CardBody>
|
91
|
+
</Card>
|
92
|
+
);
|
93
|
+
};
|
@@ -63,11 +63,15 @@ export const ModelEventView: FC<ModelEventViewProps> = ({
|
|
63
63
|
}
|
64
64
|
}
|
65
65
|
|
66
|
+
const panelTitle = event.role
|
67
|
+
? `Model Call (${event.role}): ${event.model}`
|
68
|
+
: `Model Call: ${event.model}`;
|
69
|
+
|
66
70
|
return (
|
67
71
|
<EventPanel
|
68
72
|
id={id}
|
69
73
|
className={className}
|
70
|
-
title={formatTitle(
|
74
|
+
title={formatTitle(panelTitle, totalUsage, callTime)}
|
71
75
|
subTitle={formatTiming(event.timestamp, event.working_start)}
|
72
76
|
icon={ApplicationIcons.model}
|
73
77
|
>
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import { FC } from "react";
|
2
2
|
import { ApplicationIcons } from "../../appearance/icons";
|
3
|
-
import { SampleLimitEvent,
|
3
|
+
import { SampleLimitEvent, Type10 } from "../../types/log";
|
4
4
|
import { EventPanel } from "./event/EventPanel";
|
5
5
|
|
6
6
|
interface SampleLimitEventViewProps {
|
@@ -17,7 +17,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
17
17
|
event,
|
18
18
|
className,
|
19
19
|
}) => {
|
20
|
-
const resolve_title = (type:
|
20
|
+
const resolve_title = (type: Type10) => {
|
21
21
|
switch (type) {
|
22
22
|
case "custom":
|
23
23
|
return "Custom Limit Exceeded";
|
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
34
34
|
}
|
35
35
|
};
|
36
36
|
|
37
|
-
const resolve_icon = (type:
|
37
|
+
const resolve_icon = (type: Type10) => {
|
38
38
|
switch (type) {
|
39
39
|
case "custom":
|
40
40
|
return ApplicationIcons.limits.custom;
|
@@ -276,44 +276,21 @@ function setPath(
|
|
276
276
|
value: unknown,
|
277
277
|
): void {
|
278
278
|
const keys = parsePath(path);
|
279
|
-
let current: Record<string, unknown>
|
279
|
+
let current: Record<string, unknown> = target;
|
280
280
|
|
281
281
|
for (let i = 0; i < keys.length - 1; i++) {
|
282
282
|
const key = keys[i];
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
current[numericIndex] = isArrayIndex(keys[i + 1]) ? [] : {};
|
287
|
-
current = current[numericIndex] as
|
288
|
-
| Record<string, unknown>
|
289
|
-
| Array<unknown>;
|
290
|
-
} else {
|
291
|
-
if (!(key in current)) {
|
292
|
-
// If the next key is a number, create an array, otherwise an object
|
293
|
-
current[key] = isArrayIndex(keys[i + 1]) ? [] : {};
|
294
|
-
}
|
295
|
-
current = current[key] as Record<string, unknown> | Array<unknown>;
|
283
|
+
if (!(key in current)) {
|
284
|
+
// If the next key is a number, create an array, otherwise an object
|
285
|
+
current[key] = isArrayIndex(keys[i + 1]) ? [] : {};
|
296
286
|
}
|
287
|
+
current = current[key] as Record<string, unknown>;
|
297
288
|
}
|
298
289
|
|
299
290
|
const lastKey = keys[keys.length - 1];
|
300
|
-
|
301
|
-
const numericIndex = getIndex(lastKey);
|
302
|
-
current[numericIndex] = value;
|
303
|
-
} else {
|
304
|
-
current[lastKey] = value;
|
305
|
-
}
|
291
|
+
current[lastKey] = value;
|
306
292
|
}
|
307
293
|
|
308
|
-
const getIndex = (key: string): number => {
|
309
|
-
const numericIndex = isArrayIndex(key) ? parseInt(key) : undefined;
|
310
|
-
if (numericIndex === undefined) {
|
311
|
-
throw new Error(`The key ${key} isn't a valid Array index!`);
|
312
|
-
}
|
313
|
-
|
314
|
-
return numericIndex;
|
315
|
-
};
|
316
|
-
|
317
294
|
/**
|
318
295
|
* Places structure in an object (without placing values)
|
319
296
|
*/
|