inspect-ai 0.3.87__py3-none-any.whl → 0.3.89__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. inspect_ai/_cli/eval.py +16 -0
  2. inspect_ai/_cli/score.py +1 -12
  3. inspect_ai/_cli/util.py +4 -2
  4. inspect_ai/_display/core/footer.py +2 -2
  5. inspect_ai/_display/plain/display.py +2 -2
  6. inspect_ai/_eval/context.py +7 -1
  7. inspect_ai/_eval/eval.py +51 -27
  8. inspect_ai/_eval/evalset.py +27 -10
  9. inspect_ai/_eval/loader.py +7 -8
  10. inspect_ai/_eval/run.py +23 -31
  11. inspect_ai/_eval/score.py +18 -1
  12. inspect_ai/_eval/task/log.py +5 -13
  13. inspect_ai/_eval/task/resolved.py +1 -0
  14. inspect_ai/_eval/task/run.py +231 -244
  15. inspect_ai/_eval/task/task.py +25 -2
  16. inspect_ai/_eval/task/util.py +1 -8
  17. inspect_ai/_util/constants.py +1 -0
  18. inspect_ai/_util/json.py +8 -3
  19. inspect_ai/_util/registry.py +30 -13
  20. inspect_ai/_view/www/App.css +5 -0
  21. inspect_ai/_view/www/dist/assets/index.css +55 -18
  22. inspect_ai/_view/www/dist/assets/index.js +550 -458
  23. inspect_ai/_view/www/log-schema.json +84 -1
  24. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +1 -1
  25. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +13 -8
  26. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +3 -0
  27. inspect_ai/_view/www/src/plan/ModelCard.module.css +16 -0
  28. inspect_ai/_view/www/src/plan/ModelCard.tsx +93 -0
  29. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +5 -1
  30. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
  31. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +6 -29
  32. inspect_ai/_view/www/src/types/log.d.ts +150 -129
  33. inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css +16 -0
  34. inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx +43 -0
  35. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -1
  36. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +5 -0
  37. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -0
  38. inspect_ai/agent/_agent.py +12 -0
  39. inspect_ai/agent/_as_tool.py +1 -1
  40. inspect_ai/agent/_bridge/bridge.py +9 -2
  41. inspect_ai/agent/_react.py +142 -74
  42. inspect_ai/agent/_run.py +13 -2
  43. inspect_ai/agent/_types.py +6 -0
  44. inspect_ai/approval/_apply.py +6 -9
  45. inspect_ai/approval/_approver.py +3 -3
  46. inspect_ai/approval/_auto.py +2 -2
  47. inspect_ai/approval/_call.py +20 -4
  48. inspect_ai/approval/_human/approver.py +3 -3
  49. inspect_ai/approval/_human/manager.py +2 -2
  50. inspect_ai/approval/_human/panel.py +3 -3
  51. inspect_ai/approval/_policy.py +3 -3
  52. inspect_ai/log/__init__.py +2 -0
  53. inspect_ai/log/_log.py +23 -2
  54. inspect_ai/log/_model.py +58 -0
  55. inspect_ai/log/_recorders/file.py +14 -3
  56. inspect_ai/log/_transcript.py +3 -0
  57. inspect_ai/model/__init__.py +2 -0
  58. inspect_ai/model/_call_tools.py +15 -2
  59. inspect_ai/model/_model.py +49 -3
  60. inspect_ai/model/_openai.py +151 -21
  61. inspect_ai/model/_providers/anthropic.py +25 -14
  62. inspect_ai/model/_providers/bedrock.py +3 -3
  63. inspect_ai/model/_providers/cloudflare.py +29 -108
  64. inspect_ai/model/_providers/google.py +21 -10
  65. inspect_ai/model/_providers/grok.py +23 -17
  66. inspect_ai/model/_providers/groq.py +61 -37
  67. inspect_ai/model/_providers/llama_cpp_python.py +8 -9
  68. inspect_ai/model/_providers/mistral.py +8 -3
  69. inspect_ai/model/_providers/ollama.py +8 -9
  70. inspect_ai/model/_providers/openai.py +53 -157
  71. inspect_ai/model/_providers/openai_compatible.py +195 -0
  72. inspect_ai/model/_providers/openrouter.py +4 -15
  73. inspect_ai/model/_providers/providers.py +11 -0
  74. inspect_ai/model/_providers/together.py +25 -23
  75. inspect_ai/model/_trim.py +83 -0
  76. inspect_ai/solver/_plan.py +5 -3
  77. inspect_ai/tool/_tool_call.py +3 -0
  78. inspect_ai/tool/_tool_def.py +8 -2
  79. inspect_ai/util/__init__.py +3 -0
  80. inspect_ai/util/_concurrency.py +15 -2
  81. {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/METADATA +1 -1
  82. {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/RECORD +86 -81
  83. inspect_ai/_eval/task/rundir.py +0 -78
  84. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  85. {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/WHEEL +0 -0
  86. {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/entry_points.txt +0 -0
  87. {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/licenses/LICENSE +0 -0
  88. {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/top_level.txt +0 -0
@@ -1342,6 +1342,43 @@
1342
1342
  "type": "object",
1343
1343
  "additionalProperties": false
1344
1344
  },
1345
+ "EvalModelConfig": {
1346
+ "description": "Model config.",
1347
+ "properties": {
1348
+ "model": {
1349
+ "title": "Model",
1350
+ "type": "string"
1351
+ },
1352
+ "config": {
1353
+ "$ref": "#/$defs/GenerateConfig"
1354
+ },
1355
+ "base_url": {
1356
+ "anyOf": [
1357
+ {
1358
+ "type": "string"
1359
+ },
1360
+ {
1361
+ "type": "null"
1362
+ }
1363
+ ],
1364
+ "default": null,
1365
+ "title": "Base Url"
1366
+ },
1367
+ "args": {
1368
+ "title": "Args",
1369
+ "type": "object"
1370
+ }
1371
+ },
1372
+ "required": [
1373
+ "model",
1374
+ "config",
1375
+ "base_url",
1376
+ "args"
1377
+ ],
1378
+ "title": "EvalModelConfig",
1379
+ "type": "object",
1380
+ "additionalProperties": false
1381
+ },
1345
1382
  "EvalPlan": {
1346
1383
  "description": "Plan (solvers) used in evaluation.",
1347
1384
  "properties": {
@@ -2250,6 +2287,9 @@
2250
2287
  "title": "Model",
2251
2288
  "type": "string"
2252
2289
  },
2290
+ "model_generate_config": {
2291
+ "$ref": "#/$defs/GenerateConfig"
2292
+ },
2253
2293
  "model_base_url": {
2254
2294
  "anyOf": [
2255
2295
  {
@@ -2266,6 +2306,21 @@
2266
2306
  "title": "Model Args",
2267
2307
  "type": "object"
2268
2308
  },
2309
+ "model_roles": {
2310
+ "anyOf": [
2311
+ {
2312
+ "additionalProperties": {
2313
+ "$ref": "#/$defs/EvalModelConfig"
2314
+ },
2315
+ "type": "object"
2316
+ },
2317
+ {
2318
+ "type": "null"
2319
+ }
2320
+ ],
2321
+ "default": null,
2322
+ "title": "Model Roles"
2323
+ },
2269
2324
  "config": {
2270
2325
  "$ref": "#/$defs/EvalConfig"
2271
2326
  },
@@ -2355,8 +2410,10 @@
2355
2410
  "dataset",
2356
2411
  "sandbox",
2357
2412
  "model",
2413
+ "model_generate_config",
2358
2414
  "model_base_url",
2359
2415
  "model_args",
2416
+ "model_roles",
2360
2417
  "config",
2361
2418
  "revision",
2362
2419
  "packages",
@@ -3306,6 +3363,18 @@
3306
3363
  "title": "Model",
3307
3364
  "type": "string"
3308
3365
  },
3366
+ "role": {
3367
+ "anyOf": [
3368
+ {
3369
+ "type": "string"
3370
+ },
3371
+ {
3372
+ "type": "null"
3373
+ }
3374
+ ],
3375
+ "default": null,
3376
+ "title": "Role"
3377
+ },
3309
3378
  "input": {
3310
3379
  "items": {
3311
3380
  "anyOf": [
@@ -3426,6 +3495,7 @@
3426
3495
  "pending",
3427
3496
  "event",
3428
3497
  "model",
3498
+ "role",
3429
3499
  "input",
3430
3500
  "tools",
3431
3501
  "tool_choice",
@@ -4595,6 +4665,18 @@
4595
4665
  }
4596
4666
  ],
4597
4667
  "default": null
4668
+ },
4669
+ "type": {
4670
+ "anyOf": [
4671
+ {
4672
+ "type": "string"
4673
+ },
4674
+ {
4675
+ "type": "null"
4676
+ }
4677
+ ],
4678
+ "default": null,
4679
+ "title": "Type"
4598
4680
  }
4599
4681
  },
4600
4682
  "required": [
@@ -4603,7 +4685,8 @@
4603
4685
  "arguments",
4604
4686
  "internal",
4605
4687
  "parse_error",
4606
- "view"
4688
+ "view",
4689
+ "type"
4607
4690
  ],
4608
4691
  "title": "ToolCall",
4609
4692
  "type": "object",
@@ -9,7 +9,7 @@
9
9
  }
10
10
 
11
11
  .cell {
12
- padding: 0.3em 0.3em 0.3em 0em;
12
+ padding: 0em 0.5em 0.3em 0em !important;
13
13
  }
14
14
 
15
15
  .compact .cell {
@@ -6,7 +6,7 @@ import { RenderedContent } from "./RenderedContent";
6
6
  interface MetadataViewProps {
7
7
  id?: string;
8
8
  style?: CSSProperties;
9
- entries: Record<string, unknown>;
9
+ entries: Record<string, unknown> | Array<{ name: string; value: unknown }>;
10
10
  tableOptions?: string;
11
11
  compact?: boolean;
12
12
  className?: string | string[];
@@ -66,11 +66,6 @@ export const MetaDataView: FC<MetadataViewProps> = ({
66
66
  )}
67
67
  style={style}
68
68
  >
69
- <thead>
70
- <tr>
71
- <th colSpan={2} className={"th"}></th>
72
- </tr>
73
- </thead>
74
69
  <tbody>{entryEls}</tbody>
75
70
  </table>
76
71
  );
@@ -80,11 +75,21 @@ export const MetaDataView: FC<MetadataViewProps> = ({
80
75
  // or an array of record with name/value on way in
81
76
  // but coerce to array of records for order
82
77
  const toNameValues = (
83
- entries?: Array<{ name: string; value: unknown }> | Record<string, unknown>,
78
+ entries?:
79
+ | Array<{ name: string; value: unknown }>
80
+ | Record<string, unknown>
81
+ | Array<unknown>,
84
82
  ): Array<{ name: string; value: unknown }> | undefined => {
85
83
  if (entries) {
86
84
  if (Array.isArray(entries)) {
87
- return entries;
85
+ // filter arrays that don't contain the expected name value pairs
86
+ const filtered = entries.filter((entry) => {
87
+ if (entry && typeof entry === "object") {
88
+ return "name" in entry && "value" in entry;
89
+ }
90
+ return false;
91
+ });
92
+ return filtered as Array<{ name: string; value: unknown }>;
88
93
  } else {
89
94
  return Object.entries(entries || {}).map(([key, value]) => {
90
95
  return { name: key, value };
@@ -147,6 +147,9 @@ const contentRenderers: Record<string, ContentRenderer> = {
147
147
  canRender: (entry) => {
148
148
  const isArray = Array.isArray(entry.value);
149
149
  if (isArray) {
150
+ if (entry.value.length === 0 || entry.value.length === 1) {
151
+ return true;
152
+ }
150
153
  const types = new Set(
151
154
  entry.value
152
155
  .filter((e: unknown) => e !== null)
@@ -0,0 +1,16 @@
1
+ .container {
2
+ display: grid;
3
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
4
+ row-gap: 2em;
5
+ column-gap: 1em;
6
+ }
7
+
8
+ .modelInfo {
9
+ display: grid;
10
+ grid-template-columns: max-content auto;
11
+ column-gap: 1em;
12
+ }
13
+
14
+ .role {
15
+ grid-column: -1/1;
16
+ }
@@ -0,0 +1,93 @@
1
+ import { FC } from "react";
2
+ import { ApplicationIcons } from "../appearance/icons";
3
+ import { Card, CardBody, CardHeader } from "../components/Card";
4
+ import { EvalModelConfig, EvalSpec } from "../types/log";
5
+
6
+ import clsx from "clsx";
7
+ import { MetaDataGrid } from "../metadata/MetaDataGrid";
8
+ import styles from "./ModelCard.module.css";
9
+
10
+ interface ModelCardProps {
11
+ evalSpec?: EvalSpec;
12
+ }
13
+
14
+ /**
15
+ * Renders the plan card
16
+ */
17
+ export const ModelCard: FC<ModelCardProps> = ({ evalSpec }) => {
18
+ if (!evalSpec) {
19
+ return undefined;
20
+ }
21
+
22
+ const modelsInfo: Record<string, EvalModelConfig> = {
23
+ eval: {
24
+ model: evalSpec.model,
25
+ base_url: evalSpec.model_base_url,
26
+ config: evalSpec.model_generate_config,
27
+ args: evalSpec.model_args,
28
+ },
29
+ ...evalSpec.model_roles,
30
+ };
31
+
32
+ const noneEl = <span className="text-style-secondary">None</span>;
33
+
34
+ return (
35
+ <Card>
36
+ <CardHeader icon={ApplicationIcons.model} label="Models" />
37
+ <CardBody id={"task-model-card-body"}>
38
+ <div className={styles.container}>
39
+ {Object.keys(modelsInfo || {}).map((modelKey) => {
40
+ const modelInfo = modelsInfo[modelKey];
41
+ return (
42
+ <div
43
+ key={modelKey}
44
+ className={clsx(styles.modelInfo, "text-size-small")}
45
+ >
46
+ <div
47
+ className={clsx(
48
+ styles.role,
49
+ "text-style-label",
50
+ "text-style-secondary",
51
+ )}
52
+ >
53
+ {modelKey}
54
+ </div>
55
+
56
+ <div className={clsx("text-style-label")}>Model</div>
57
+ <div>{modelInfo.model}</div>
58
+
59
+ <div className={clsx("text-style-label")}>Base Url</div>
60
+ <div className="text-size-small">
61
+ {modelInfo.base_url || noneEl}
62
+ </div>
63
+ <div className={clsx("text-style-label")}>Configuration</div>
64
+ <div className="text-size-small">
65
+ {modelInfo.config &&
66
+ Object.keys(modelInfo.config).length > 0 ? (
67
+ <MetaDataGrid
68
+ entries={
69
+ modelInfo.config as any as Record<string, unknown>
70
+ }
71
+ />
72
+ ) : (
73
+ noneEl
74
+ )}
75
+ </div>
76
+ <div className={clsx("text-style-label")}>Args</div>
77
+ <div className="text-size-small">
78
+ {Object.keys(modelInfo.args).length > 0 ? (
79
+ <MetaDataGrid
80
+ entries={modelInfo.args as any as Record<string, unknown>}
81
+ />
82
+ ) : (
83
+ noneEl
84
+ )}
85
+ </div>
86
+ </div>
87
+ );
88
+ })}
89
+ </div>
90
+ </CardBody>
91
+ </Card>
92
+ );
93
+ };
@@ -63,11 +63,15 @@ export const ModelEventView: FC<ModelEventViewProps> = ({
63
63
  }
64
64
  }
65
65
 
66
+ const panelTitle = event.role
67
+ ? `Model Call (${event.role}): ${event.model}`
68
+ : `Model Call: ${event.model}`;
69
+
66
70
  return (
67
71
  <EventPanel
68
72
  id={id}
69
73
  className={className}
70
- title={formatTitle(`Model Call: ${event.model}`, totalUsage, callTime)}
74
+ title={formatTitle(panelTitle, totalUsage, callTime)}
71
75
  subTitle={formatTiming(event.timestamp, event.working_start)}
72
76
  icon={ApplicationIcons.model}
73
77
  >
@@ -1,6 +1,6 @@
1
1
  import { FC } from "react";
2
2
  import { ApplicationIcons } from "../../appearance/icons";
3
- import { SampleLimitEvent, Type9 } from "../../types/log";
3
+ import { SampleLimitEvent, Type10 } from "../../types/log";
4
4
  import { EventPanel } from "./event/EventPanel";
5
5
 
6
6
  interface SampleLimitEventViewProps {
@@ -17,7 +17,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
17
17
  event,
18
18
  className,
19
19
  }) => {
20
- const resolve_title = (type: Type9) => {
20
+ const resolve_title = (type: Type10) => {
21
21
  switch (type) {
22
22
  case "custom":
23
23
  return "Custom Limit Exceeded";
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
34
34
  }
35
35
  };
36
36
 
37
- const resolve_icon = (type: Type9) => {
37
+ const resolve_icon = (type: Type10) => {
38
38
  switch (type) {
39
39
  case "custom":
40
40
  return ApplicationIcons.limits.custom;
@@ -276,44 +276,21 @@ function setPath(
276
276
  value: unknown,
277
277
  ): void {
278
278
  const keys = parsePath(path);
279
- let current: Record<string, unknown> | unknown[] = target;
279
+ let current: Record<string, unknown> = target;
280
280
 
281
281
  for (let i = 0; i < keys.length - 1; i++) {
282
282
  const key = keys[i];
283
-
284
- if (Array.isArray(current)) {
285
- const numericIndex = getIndex(key);
286
- current[numericIndex] = isArrayIndex(keys[i + 1]) ? [] : {};
287
- current = current[numericIndex] as
288
- | Record<string, unknown>
289
- | Array<unknown>;
290
- } else {
291
- if (!(key in current)) {
292
- // If the next key is a number, create an array, otherwise an object
293
- current[key] = isArrayIndex(keys[i + 1]) ? [] : {};
294
- }
295
- current = current[key] as Record<string, unknown> | Array<unknown>;
283
+ if (!(key in current)) {
284
+ // If the next key is a number, create an array, otherwise an object
285
+ current[key] = isArrayIndex(keys[i + 1]) ? [] : {};
296
286
  }
287
+ current = current[key] as Record<string, unknown>;
297
288
  }
298
289
 
299
290
  const lastKey = keys[keys.length - 1];
300
- if (Array.isArray(current)) {
301
- const numericIndex = getIndex(lastKey);
302
- current[numericIndex] = value;
303
- } else {
304
- current[lastKey] = value;
305
- }
291
+ current[lastKey] = value;
306
292
  }
307
293
 
308
- const getIndex = (key: string): number => {
309
- const numericIndex = isArrayIndex(key) ? parseInt(key) : undefined;
310
- if (numericIndex === undefined) {
311
- throw new Error(`The key ${key} isn't a valid Array index!`);
312
- }
313
-
314
- return numericIndex;
315
- };
316
-
317
294
  /**
318
295
  * Places structure in an object (without placing values)
319
296
  */