inspect-ai 0.3.88__py3-none-any.whl → 0.3.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. inspect_ai/_cli/eval.py +16 -0
  2. inspect_ai/_cli/score.py +1 -12
  3. inspect_ai/_cli/util.py +4 -2
  4. inspect_ai/_display/core/footer.py +2 -2
  5. inspect_ai/_display/plain/display.py +2 -2
  6. inspect_ai/_eval/context.py +7 -1
  7. inspect_ai/_eval/eval.py +51 -27
  8. inspect_ai/_eval/evalset.py +27 -10
  9. inspect_ai/_eval/loader.py +7 -8
  10. inspect_ai/_eval/run.py +23 -31
  11. inspect_ai/_eval/score.py +18 -1
  12. inspect_ai/_eval/task/log.py +5 -13
  13. inspect_ai/_eval/task/resolved.py +1 -0
  14. inspect_ai/_eval/task/run.py +231 -256
  15. inspect_ai/_eval/task/task.py +25 -2
  16. inspect_ai/_eval/task/util.py +1 -8
  17. inspect_ai/_util/constants.py +1 -0
  18. inspect_ai/_util/json.py +8 -3
  19. inspect_ai/_util/registry.py +30 -13
  20. inspect_ai/_view/www/App.css +5 -0
  21. inspect_ai/_view/www/dist/assets/index.css +71 -36
  22. inspect_ai/_view/www/dist/assets/index.js +573 -475
  23. inspect_ai/_view/www/log-schema.json +66 -0
  24. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +1 -1
  25. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +13 -8
  26. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +3 -0
  27. inspect_ai/_view/www/src/plan/ModelCard.module.css +16 -0
  28. inspect_ai/_view/www/src/plan/ModelCard.tsx +93 -0
  29. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -2
  30. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
  31. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +5 -1
  32. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +12 -6
  33. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +0 -2
  34. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +6 -29
  35. inspect_ai/_view/www/src/types/log.d.ts +24 -6
  36. inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css +16 -0
  37. inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx +43 -0
  38. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -1
  39. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +5 -0
  40. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -0
  41. inspect_ai/agent/_agent.py +12 -0
  42. inspect_ai/agent/_as_tool.py +1 -1
  43. inspect_ai/agent/_bridge/bridge.py +9 -2
  44. inspect_ai/agent/_react.py +142 -74
  45. inspect_ai/agent/_run.py +13 -2
  46. inspect_ai/agent/_types.py +6 -0
  47. inspect_ai/approval/_apply.py +6 -7
  48. inspect_ai/approval/_approver.py +3 -3
  49. inspect_ai/approval/_auto.py +2 -2
  50. inspect_ai/approval/_call.py +20 -4
  51. inspect_ai/approval/_human/approver.py +3 -3
  52. inspect_ai/approval/_human/manager.py +2 -2
  53. inspect_ai/approval/_human/panel.py +3 -3
  54. inspect_ai/approval/_policy.py +3 -3
  55. inspect_ai/log/__init__.py +2 -0
  56. inspect_ai/log/_log.py +23 -2
  57. inspect_ai/log/_model.py +58 -0
  58. inspect_ai/log/_recorders/file.py +14 -3
  59. inspect_ai/log/_transcript.py +3 -0
  60. inspect_ai/model/__init__.py +2 -0
  61. inspect_ai/model/_call_tools.py +4 -1
  62. inspect_ai/model/_model.py +49 -3
  63. inspect_ai/model/_openai.py +151 -21
  64. inspect_ai/model/_providers/anthropic.py +20 -12
  65. inspect_ai/model/_providers/bedrock.py +3 -3
  66. inspect_ai/model/_providers/cloudflare.py +29 -108
  67. inspect_ai/model/_providers/google.py +21 -10
  68. inspect_ai/model/_providers/grok.py +23 -17
  69. inspect_ai/model/_providers/groq.py +61 -37
  70. inspect_ai/model/_providers/llama_cpp_python.py +8 -9
  71. inspect_ai/model/_providers/mistral.py +8 -3
  72. inspect_ai/model/_providers/ollama.py +8 -9
  73. inspect_ai/model/_providers/openai.py +53 -157
  74. inspect_ai/model/_providers/openai_compatible.py +195 -0
  75. inspect_ai/model/_providers/openrouter.py +4 -15
  76. inspect_ai/model/_providers/providers.py +11 -0
  77. inspect_ai/model/_providers/together.py +25 -23
  78. inspect_ai/model/_trim.py +83 -0
  79. inspect_ai/solver/_plan.py +5 -3
  80. inspect_ai/tool/_tool_def.py +8 -2
  81. inspect_ai/util/__init__.py +3 -0
  82. inspect_ai/util/_concurrency.py +15 -2
  83. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/METADATA +1 -1
  84. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/RECORD +88 -83
  85. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/WHEEL +1 -1
  86. inspect_ai/_eval/task/rundir.py +0 -78
  87. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  88. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/entry_points.txt +0 -0
  89. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/licenses/LICENSE +0 -0
  90. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/top_level.txt +0 -0
@@ -38,6 +38,7 @@ CONSOLE_DISPLAY_WIDTH = 120
38
38
  BASE_64_DATA_REMOVED = "<base64-data-removed>"
39
39
  SANDBOX_SETUP_TIMEOUT = 300
40
40
  NO_CONTENT = "(no content)"
41
+ MODEL_NONE = "none/none"
41
42
 
42
43
  DESERIALIZING = "deserializing"
43
44
  DESERIALIZING_CONTEXT = {DESERIALIZING: True}
inspect_ai/_util/json.py CHANGED
@@ -93,9 +93,14 @@ def json_changes(
93
93
  replaced = before
94
94
  for path in paths:
95
95
  decoded_path = decode_json_pointer_segment(path)
96
- index: Any = (
97
- int(decoded_path) if decoded_path.isnumeric() else decoded_path
98
- )
96
+ if isinstance(replaced, list):
97
+ if not decoded_path.isnumeric():
98
+ raise ValueError(
99
+ f"Invalid JSON Pointer segment for list: {decoded_path}"
100
+ )
101
+ index = int(decoded_path)
102
+ else:
103
+ index = decoded_path
99
104
  replaced = replaced[index]
100
105
  json_change.replaced = replaced
101
106
  changes.append(json_change)
@@ -14,18 +14,24 @@ from .entrypoints import ensure_entry_points
14
14
  obj_type = type
15
15
 
16
16
  RegistryType = Literal[
17
- "modelapi",
18
17
  "task",
19
18
  "solver",
20
- "plan",
19
+ "agent",
20
+ "tool",
21
21
  "scorer",
22
22
  "metric",
23
- "tool",
24
- "agent",
25
- "sandboxenv",
26
23
  "score_reducer",
24
+ "modelapi",
25
+ "sandboxenv",
27
26
  "approver",
28
27
  ]
28
+ """Enumeration of registry object types.
29
+
30
+ These are the types of objects in this system that can be
31
+ registered using a decorator (e.g. `@task`, `@solver`).
32
+ Registered objects can in turn be created dynamically using
33
+ the `registry_create()` function.
34
+ """
29
35
 
30
36
 
31
37
  class RegistryInfo(BaseModel):
@@ -181,17 +187,28 @@ def registry_find(predicate: Callable[[RegistryInfo], bool]) -> list[object]:
181
187
  def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
182
188
  r"""Create a registry object.
183
189
 
184
- Registry objects can be ordinary functions that implement a protocol,
185
- factory functions that return a function based on **kwargs, or classes
186
- deriving that can be created using **kwargs
190
+ Creates objects registered via decorator (e.g. `@task`, `@solver`). Note
191
+ that this can also create registered objects within Python packages, in
192
+ which case the name of the package should be used a prefix, e.g.
193
+
194
+ ```python
195
+ registry_create("scorer", "mypackage/myscorer", ...)
196
+ ```
197
+
198
+ Object within the Inspect package do not require a prefix, nor do
199
+ objects from imported modules that aren't in a package.
187
200
 
188
201
  Args:
189
- type (RegistryType): Type of registry object to create
190
- name (str): Name of registry options to create
191
- **kwargs (Any): Optional creation arguments
202
+ type: Type of registry object to create
203
+ name: Name of registry object to create
204
+ **kwargs: Optional creation arguments
192
205
 
193
206
  Returns:
194
- Registry object with registry info attribute
207
+ Instance of specified name and type.
208
+
209
+ Raises:
210
+ LookupError: If the named object was not found in the registry.
211
+ TypeError: If the specified parameters are not valid for the object.
195
212
  """
196
213
  # lookup the object
197
214
  obj = registry_lookup(type, name)
@@ -225,7 +242,7 @@ def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
225
242
  else:
226
243
  return obj
227
244
  else:
228
- raise ValueError(f"{name} was not found in the registry")
245
+ raise LookupError(f"{name} was not found in the registry")
229
246
 
230
247
 
231
248
  def registry_info(o: object) -> RegistryInfo:
@@ -31,6 +31,7 @@
31
31
  --inspect-font-size-base: 0.9rem;
32
32
  --inspect-font-size-small: 0.8rem;
33
33
  --inspect-font-size-smaller: 0.8rem;
34
+ --inspect-font-size-smallest: 0.7rem;
34
35
 
35
36
  /* Inspect Glass */
36
37
  --inspect-glass-color: #000000;
@@ -113,6 +114,10 @@ body[class^="vscode-"] .app-main-grid {
113
114
  font-size: var(--inspect-font-size-smaller);
114
115
  }
115
116
 
117
+ .text-size-smallest {
118
+ font-size: var(--inspect-font-size-smallest);
119
+ }
120
+
116
121
  .text-truncate {
117
122
  white-space: nowrap;
118
123
  text-overflow: ellipsis;
@@ -14304,6 +14304,7 @@ pre[class*="language-"] {
14304
14304
  --inspect-font-size-base: 0.9rem;
14305
14305
  --inspect-font-size-small: 0.8rem;
14306
14306
  --inspect-font-size-smaller: 0.8rem;
14307
+ --inspect-font-size-smallest: 0.7rem;
14307
14308
 
14308
14309
  /* Inspect Glass */
14309
14310
  --inspect-glass-color: #000000;
@@ -14386,6 +14387,10 @@ body[class^="vscode-"] .app-main-grid {
14386
14387
  font-size: var(--inspect-font-size-smaller);
14387
14388
  }
14388
14389
 
14390
+ .text-size-smallest {
14391
+ font-size: var(--inspect-font-size-smallest);
14392
+ }
14393
+
14389
14394
  .text-truncate {
14390
14395
  white-space: nowrap;
14391
14396
  text-overflow: ellipsis;
@@ -15904,18 +15909,18 @@ ul.jsondiffpatch-textdiff {
15904
15909
  ._output_s17ok_1 {
15905
15910
  padding-top: 1em;
15906
15911
  }
15907
- ._outputPre_1h1fn_1 {
15912
+ ._outputPre_1t1y9_1 {
15908
15913
  padding: 0.5em;
15909
15914
  margin-top: 0.25em;
15910
15915
  margin-bottom: 1rem;
15911
15916
  }
15912
15917
 
15913
- ._outputCode_1h1fn_7 {
15914
- overflow-wrap: anywhere;
15915
- white-space: pre-wrap;
15918
+ ._outputCode_1t1y9_7 {
15919
+ overflow-wrap: anywhere !important;
15920
+ white-space: pre-wrap !important;
15916
15921
  }
15917
15922
 
15918
- ._bottomPadding_1h1fn_12 {
15923
+ ._bottomPadding_1t1y9_12 {
15919
15924
  padding-bottom: 1em;
15920
15925
  }
15921
15926
  ._image_10saa_1 {
@@ -15934,37 +15939,37 @@ ul.jsondiffpatch-textdiff {
15934
15939
  ._number_140x5_7 {
15935
15940
  margin-top: 0.1em;
15936
15941
  }
15937
- ._table_1memb_1 {
15942
+ ._table_9qith_1 {
15938
15943
  padding-left: 0;
15939
15944
  margin-left: 0;
15940
15945
  margin-bottom: 0.2rem;
15941
15946
  }
15942
15947
 
15943
- ._th_1memb_7 {
15948
+ ._th_9qith_7 {
15944
15949
  padding: 0;
15945
15950
  }
15946
15951
 
15947
- ._cell_1memb_11 {
15948
- padding: 0.3em 0.3em 0.3em 0em;
15952
+ ._cell_9qith_11 {
15953
+ padding: 0em 0.5em 0.3em 0em !important;
15949
15954
  }
15950
15955
 
15951
- ._compact_1memb_15 ._cell_1memb_11 {
15956
+ ._compact_9qith_15 ._cell_9qith_11 {
15952
15957
  padding: 0;
15953
15958
  }
15954
15959
 
15955
- ._cellKey_1memb_19 {
15960
+ ._cellKey_9qith_19 {
15956
15961
  font-weight: 400;
15957
15962
  padding-right: 1em;
15958
15963
  white-space: nowrap;
15959
15964
  }
15960
15965
 
15961
- ._compact_1memb_15 ._cellKey_1memb_19 {
15966
+ ._compact_9qith_15 ._cellKey_9qith_19 {
15962
15967
  font-weight: 400;
15963
15968
  padding-right: 0.2em;
15964
15969
  white-space: nowrap;
15965
15970
  }
15966
15971
 
15967
- ._cellValue_1memb_31 {
15972
+ ._cellValue_9qith_31 {
15968
15973
  font-weight: 300;
15969
15974
  white-space: pre-wrap;
15970
15975
  word-wrap: anywhere;
@@ -19331,53 +19336,51 @@ span.ap-marker-container:hover span.ap-marker {
19331
19336
  ._progress_1qsnv_12 {
19332
19337
  margin-left: 0.5em;
19333
19338
  }
19334
- ._darkenedBg_c8m1t_1 {
19339
+ ._darkenedBg_eq9th_1 {
19335
19340
  background-color: var(--bs-light-bg-subtle);
19336
19341
  }
19337
19342
 
19338
- ._normalBg_c8m1t_5 {
19343
+ ._normalBg_eq9th_5 {
19339
19344
  background-color: var(--bs-body-bg);
19340
19345
  }
19341
19346
 
19342
- ._node_c8m1t_9._first_c8m1t_9 {
19347
+ ._node_eq9th_9._first_eq9th_9 {
19343
19348
  padding-top: 0.5em;
19344
19349
  }
19345
19350
 
19346
- ._node_c8m1t_9 {
19351
+ ._node_eq9th_9 {
19347
19352
  padding-top: 0;
19348
19353
  padding-bottom: 0.5em;
19349
19354
  }
19350
19355
 
19351
- ._nodes_c8m1t_18 {
19356
+ ._nodes_eq9th_18 {
19352
19357
  width: 100%;
19353
19358
  margin-top: 1em;
19354
19359
  }
19355
19360
 
19356
- ._transcriptComponent_c8m1t_23 {
19361
+ ._transcriptComponent_eq9th_23 {
19357
19362
  display: grid;
19358
19363
  margin: 0.5em 0 0 0;
19359
19364
  width: 100%;
19360
19365
  }
19361
19366
 
19362
- ._eventNode_c8m1t_29 {
19367
+ ._eventNode_eq9th_29 {
19363
19368
  background-color: var(--bs-body-bg);
19364
- margin-bottom: 1.5em;
19365
- padding-bottom: 0.5em;
19366
19369
  }
19367
19370
 
19368
- ._eventNode_c8m1t_29._darkenBg_c8m1t_35 {
19371
+ ._eventNode_eq9th_29._darkenBg_eq9th_33 {
19369
19372
  background-color: var(--bs-light-bg-subtle);
19370
19373
  }
19371
19374
 
19372
- ._eventNode_c8m1t_29._lastNode_c8m1t_39 {
19375
+ ._eventNode_eq9th_29._lastNode_eq9th_37 {
19373
19376
  margin-bottom: 0;
19374
19377
  }
19375
19378
 
19376
- ._eventNodeContainer_c8m1t_43 {
19379
+ ._eventNodeContainer_eq9th_41 {
19377
19380
  padding-bottom: 0.5em;
19378
19381
  }
19379
19382
 
19380
- ._eventNodeContainer_c8m1t_43._noBottom_c8m1t_47 {
19383
+ ._eventNodeContainer_eq9th_41._noBottom_eq9th_45 {
19381
19384
  padding-bottom: 0;
19382
19385
  }
19383
19386
  ._darkenedBg_1ye6u_1 {
@@ -19645,6 +19648,22 @@ span.ap-marker-container:hover span.ap-marker {
19645
19648
  ._text_1yknn_20 {
19646
19649
  margin-top: -2px;
19647
19650
  }
19651
+ ._container_304w9_1 {
19652
+ display: grid;
19653
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
19654
+ row-gap: 2em;
19655
+ column-gap: 1em;
19656
+ }
19657
+
19658
+ ._modelInfo_304w9_8 {
19659
+ display: grid;
19660
+ grid-template-columns: max-content auto;
19661
+ column-gap: 1em;
19662
+ }
19663
+
19664
+ ._role_304w9_14 {
19665
+ grid-column: -1/1;
19666
+ }
19648
19667
  ._item_1uzhd_1 {
19649
19668
  margin-bottom: 0em;
19650
19669
  }
@@ -19808,55 +19827,71 @@ span.ap-marker-container:hover span.ap-marker {
19808
19827
  ._copyButton_1goi8_1:hover {
19809
19828
  opacity: 0.75;
19810
19829
  }
19811
- ._container_14b93_1 {
19830
+ ._container_q17yq_1 {
19831
+ display: flex;
19832
+ flex-direction: row;
19833
+ flex-wrap: wrap;
19834
+ gap: 0;
19835
+ margin-top: -0.2rem;
19836
+ margin-bottom: 0.2rem;
19837
+ }
19838
+
19839
+ ._grid_q17yq_10 {
19840
+ display: grid;
19841
+ grid-template-rows: repeat(auto-fill, minmax(10px, 1fr));
19842
+ grid-template-columns: 1fr;
19843
+ gap: 0.1em;
19844
+ padding-right: 1em;
19845
+ }
19846
+ ._container_291sb_1 {
19812
19847
  display: flex;
19813
19848
  padding-top: 0;
19814
19849
  margin-left: 0.5rem;
19815
19850
  min-width: 250px;
19816
19851
  }
19817
19852
 
19818
- ._wrapper_14b93_8 {
19853
+ ._wrapper_291sb_8 {
19819
19854
  display: grid;
19820
19855
  grid-template-columns: minmax(auto, 1fr) 1fr;
19821
19856
  width: 100%;
19822
19857
  }
19823
19858
 
19824
- ._toggle_14b93_14 {
19859
+ ._toggle_291sb_14 {
19825
19860
  padding: 0rem 0.1rem 0.1rem 0rem;
19826
19861
  display: flex;
19827
19862
  }
19828
19863
 
19829
- ._body_14b93_19 {
19864
+ ._body_291sb_19 {
19830
19865
  display: flex;
19831
19866
  flex-direction: column;
19832
19867
  margin-left: 0.2rem;
19833
19868
  }
19834
19869
 
19835
- ._bodyContainer_14b93_25 {
19870
+ ._bodyContainer_291sb_25 {
19836
19871
  margin-top: 0.1rem;
19837
19872
  display: grid;
19838
19873
  grid-template-columns: minmax(30px, max-content) minmax(100px, max-content);
19839
19874
  }
19840
19875
 
19841
- ._taskTitle_14b93_31 {
19876
+ ._taskTitle_291sb_31 {
19842
19877
  font-weight: 600;
19843
19878
  margin-right: 0.3rem;
19844
19879
  }
19845
19880
 
19846
- ._taskModel_14b93_36 {
19881
+ ._taskModel_291sb_36 {
19847
19882
  padding-top: 0.4rem;
19848
19883
  }
19849
19884
 
19850
- ._taskStatus_14b93_40 {
19885
+ ._taskStatus_291sb_40 {
19851
19886
  display: flex;
19852
19887
  justify-content: end;
19853
19888
  margin-right: 1em;
19854
19889
  margin-bottom: 0;
19855
19890
  }
19856
19891
 
19857
- ._secondaryContainer_14b93_47 {
19892
+ ._secondaryContainer_291sb_47 {
19858
19893
  opacity: 0.7;
19859
- margin-top: 0.1rem;
19894
+ margin-top: -0.1rem;
19860
19895
  padding-bottom: 0;
19861
19896
  display: grid;
19862
19897
  grid-template-columns: minmax(0, max-content) max-content;