inspect-ai 0.3.88__py3-none-any.whl → 0.3.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +16 -0
- inspect_ai/_cli/score.py +1 -12
- inspect_ai/_cli/util.py +4 -2
- inspect_ai/_display/core/footer.py +2 -2
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_eval/context.py +7 -1
- inspect_ai/_eval/eval.py +51 -27
- inspect_ai/_eval/evalset.py +27 -10
- inspect_ai/_eval/loader.py +7 -8
- inspect_ai/_eval/run.py +23 -31
- inspect_ai/_eval/score.py +18 -1
- inspect_ai/_eval/task/log.py +5 -13
- inspect_ai/_eval/task/resolved.py +1 -0
- inspect_ai/_eval/task/run.py +231 -256
- inspect_ai/_eval/task/task.py +25 -2
- inspect_ai/_eval/task/util.py +1 -8
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/json.py +8 -3
- inspect_ai/_util/registry.py +30 -13
- inspect_ai/_view/www/App.css +5 -0
- inspect_ai/_view/www/dist/assets/index.css +71 -36
- inspect_ai/_view/www/dist/assets/index.js +573 -475
- inspect_ai/_view/www/log-schema.json +66 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +1 -1
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +13 -8
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +3 -0
- inspect_ai/_view/www/src/plan/ModelCard.module.css +16 -0
- inspect_ai/_view/www/src/plan/ModelCard.tsx +93 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +5 -1
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +12 -6
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +0 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +6 -29
- inspect_ai/_view/www/src/types/log.d.ts +24 -6
- inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css +16 -0
- inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +5 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -0
- inspect_ai/agent/_agent.py +12 -0
- inspect_ai/agent/_as_tool.py +1 -1
- inspect_ai/agent/_bridge/bridge.py +9 -2
- inspect_ai/agent/_react.py +142 -74
- inspect_ai/agent/_run.py +13 -2
- inspect_ai/agent/_types.py +6 -0
- inspect_ai/approval/_apply.py +6 -7
- inspect_ai/approval/_approver.py +3 -3
- inspect_ai/approval/_auto.py +2 -2
- inspect_ai/approval/_call.py +20 -4
- inspect_ai/approval/_human/approver.py +3 -3
- inspect_ai/approval/_human/manager.py +2 -2
- inspect_ai/approval/_human/panel.py +3 -3
- inspect_ai/approval/_policy.py +3 -3
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_log.py +23 -2
- inspect_ai/log/_model.py +58 -0
- inspect_ai/log/_recorders/file.py +14 -3
- inspect_ai/log/_transcript.py +3 -0
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +4 -1
- inspect_ai/model/_model.py +49 -3
- inspect_ai/model/_openai.py +151 -21
- inspect_ai/model/_providers/anthropic.py +20 -12
- inspect_ai/model/_providers/bedrock.py +3 -3
- inspect_ai/model/_providers/cloudflare.py +29 -108
- inspect_ai/model/_providers/google.py +21 -10
- inspect_ai/model/_providers/grok.py +23 -17
- inspect_ai/model/_providers/groq.py +61 -37
- inspect_ai/model/_providers/llama_cpp_python.py +8 -9
- inspect_ai/model/_providers/mistral.py +8 -3
- inspect_ai/model/_providers/ollama.py +8 -9
- inspect_ai/model/_providers/openai.py +53 -157
- inspect_ai/model/_providers/openai_compatible.py +195 -0
- inspect_ai/model/_providers/openrouter.py +4 -15
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/model/_providers/together.py +25 -23
- inspect_ai/model/_trim.py +83 -0
- inspect_ai/solver/_plan.py +5 -3
- inspect_ai/tool/_tool_def.py +8 -2
- inspect_ai/util/__init__.py +3 -0
- inspect_ai/util/_concurrency.py +15 -2
- {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/RECORD +88 -83
- {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/WHEEL +1 -1
- inspect_ai/_eval/task/rundir.py +0 -78
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
- {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/top_level.txt +0 -0
inspect_ai/_util/constants.py
CHANGED
inspect_ai/_util/json.py
CHANGED
@@ -93,9 +93,14 @@ def json_changes(
|
|
93
93
|
replaced = before
|
94
94
|
for path in paths:
|
95
95
|
decoded_path = decode_json_pointer_segment(path)
|
96
|
-
|
97
|
-
|
98
|
-
|
96
|
+
if isinstance(replaced, list):
|
97
|
+
if not decoded_path.isnumeric():
|
98
|
+
raise ValueError(
|
99
|
+
f"Invalid JSON Pointer segment for list: {decoded_path}"
|
100
|
+
)
|
101
|
+
index = int(decoded_path)
|
102
|
+
else:
|
103
|
+
index = decoded_path
|
99
104
|
replaced = replaced[index]
|
100
105
|
json_change.replaced = replaced
|
101
106
|
changes.append(json_change)
|
inspect_ai/_util/registry.py
CHANGED
@@ -14,18 +14,24 @@ from .entrypoints import ensure_entry_points
|
|
14
14
|
obj_type = type
|
15
15
|
|
16
16
|
RegistryType = Literal[
|
17
|
-
"modelapi",
|
18
17
|
"task",
|
19
18
|
"solver",
|
20
|
-
"
|
19
|
+
"agent",
|
20
|
+
"tool",
|
21
21
|
"scorer",
|
22
22
|
"metric",
|
23
|
-
"tool",
|
24
|
-
"agent",
|
25
|
-
"sandboxenv",
|
26
23
|
"score_reducer",
|
24
|
+
"modelapi",
|
25
|
+
"sandboxenv",
|
27
26
|
"approver",
|
28
27
|
]
|
28
|
+
"""Enumeration of registry object types.
|
29
|
+
|
30
|
+
These are the types of objects in this system that can be
|
31
|
+
registered using a decorator (e.g. `@task`, `@solver`).
|
32
|
+
Registered objects can in turn be created dynamically using
|
33
|
+
the `registry_create()` function.
|
34
|
+
"""
|
29
35
|
|
30
36
|
|
31
37
|
class RegistryInfo(BaseModel):
|
@@ -181,17 +187,28 @@ def registry_find(predicate: Callable[[RegistryInfo], bool]) -> list[object]:
|
|
181
187
|
def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
|
182
188
|
r"""Create a registry object.
|
183
189
|
|
184
|
-
|
185
|
-
|
186
|
-
|
190
|
+
Creates objects registered via decorator (e.g. `@task`, `@solver`). Note
|
191
|
+
that this can also create registered objects within Python packages, in
|
192
|
+
which case the name of the package should be used a prefix, e.g.
|
193
|
+
|
194
|
+
```python
|
195
|
+
registry_create("scorer", "mypackage/myscorer", ...)
|
196
|
+
```
|
197
|
+
|
198
|
+
Object within the Inspect package do not require a prefix, nor do
|
199
|
+
objects from imported modules that aren't in a package.
|
187
200
|
|
188
201
|
Args:
|
189
|
-
type
|
190
|
-
name
|
191
|
-
**kwargs
|
202
|
+
type: Type of registry object to create
|
203
|
+
name: Name of registry object to create
|
204
|
+
**kwargs: Optional creation arguments
|
192
205
|
|
193
206
|
Returns:
|
194
|
-
|
207
|
+
Instance of specified name and type.
|
208
|
+
|
209
|
+
Raises:
|
210
|
+
LookupError: If the named object was not found in the registry.
|
211
|
+
TypeError: If the specified parameters are not valid for the object.
|
195
212
|
"""
|
196
213
|
# lookup the object
|
197
214
|
obj = registry_lookup(type, name)
|
@@ -225,7 +242,7 @@ def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
|
|
225
242
|
else:
|
226
243
|
return obj
|
227
244
|
else:
|
228
|
-
raise
|
245
|
+
raise LookupError(f"{name} was not found in the registry")
|
229
246
|
|
230
247
|
|
231
248
|
def registry_info(o: object) -> RegistryInfo:
|
inspect_ai/_view/www/App.css
CHANGED
@@ -31,6 +31,7 @@
|
|
31
31
|
--inspect-font-size-base: 0.9rem;
|
32
32
|
--inspect-font-size-small: 0.8rem;
|
33
33
|
--inspect-font-size-smaller: 0.8rem;
|
34
|
+
--inspect-font-size-smallest: 0.7rem;
|
34
35
|
|
35
36
|
/* Inspect Glass */
|
36
37
|
--inspect-glass-color: #000000;
|
@@ -113,6 +114,10 @@ body[class^="vscode-"] .app-main-grid {
|
|
113
114
|
font-size: var(--inspect-font-size-smaller);
|
114
115
|
}
|
115
116
|
|
117
|
+
.text-size-smallest {
|
118
|
+
font-size: var(--inspect-font-size-smallest);
|
119
|
+
}
|
120
|
+
|
116
121
|
.text-truncate {
|
117
122
|
white-space: nowrap;
|
118
123
|
text-overflow: ellipsis;
|
@@ -14304,6 +14304,7 @@ pre[class*="language-"] {
|
|
14304
14304
|
--inspect-font-size-base: 0.9rem;
|
14305
14305
|
--inspect-font-size-small: 0.8rem;
|
14306
14306
|
--inspect-font-size-smaller: 0.8rem;
|
14307
|
+
--inspect-font-size-smallest: 0.7rem;
|
14307
14308
|
|
14308
14309
|
/* Inspect Glass */
|
14309
14310
|
--inspect-glass-color: #000000;
|
@@ -14386,6 +14387,10 @@ body[class^="vscode-"] .app-main-grid {
|
|
14386
14387
|
font-size: var(--inspect-font-size-smaller);
|
14387
14388
|
}
|
14388
14389
|
|
14390
|
+
.text-size-smallest {
|
14391
|
+
font-size: var(--inspect-font-size-smallest);
|
14392
|
+
}
|
14393
|
+
|
14389
14394
|
.text-truncate {
|
14390
14395
|
white-space: nowrap;
|
14391
14396
|
text-overflow: ellipsis;
|
@@ -15904,18 +15909,18 @@ ul.jsondiffpatch-textdiff {
|
|
15904
15909
|
._output_s17ok_1 {
|
15905
15910
|
padding-top: 1em;
|
15906
15911
|
}
|
15907
|
-
.
|
15912
|
+
._outputPre_1t1y9_1 {
|
15908
15913
|
padding: 0.5em;
|
15909
15914
|
margin-top: 0.25em;
|
15910
15915
|
margin-bottom: 1rem;
|
15911
15916
|
}
|
15912
15917
|
|
15913
|
-
.
|
15914
|
-
overflow-wrap: anywhere;
|
15915
|
-
white-space: pre-wrap;
|
15918
|
+
._outputCode_1t1y9_7 {
|
15919
|
+
overflow-wrap: anywhere !important;
|
15920
|
+
white-space: pre-wrap !important;
|
15916
15921
|
}
|
15917
15922
|
|
15918
|
-
.
|
15923
|
+
._bottomPadding_1t1y9_12 {
|
15919
15924
|
padding-bottom: 1em;
|
15920
15925
|
}
|
15921
15926
|
._image_10saa_1 {
|
@@ -15934,37 +15939,37 @@ ul.jsondiffpatch-textdiff {
|
|
15934
15939
|
._number_140x5_7 {
|
15935
15940
|
margin-top: 0.1em;
|
15936
15941
|
}
|
15937
|
-
.
|
15942
|
+
._table_9qith_1 {
|
15938
15943
|
padding-left: 0;
|
15939
15944
|
margin-left: 0;
|
15940
15945
|
margin-bottom: 0.2rem;
|
15941
15946
|
}
|
15942
15947
|
|
15943
|
-
.
|
15948
|
+
._th_9qith_7 {
|
15944
15949
|
padding: 0;
|
15945
15950
|
}
|
15946
15951
|
|
15947
|
-
.
|
15948
|
-
padding:
|
15952
|
+
._cell_9qith_11 {
|
15953
|
+
padding: 0em 0.5em 0.3em 0em !important;
|
15949
15954
|
}
|
15950
15955
|
|
15951
|
-
.
|
15956
|
+
._compact_9qith_15 ._cell_9qith_11 {
|
15952
15957
|
padding: 0;
|
15953
15958
|
}
|
15954
15959
|
|
15955
|
-
.
|
15960
|
+
._cellKey_9qith_19 {
|
15956
15961
|
font-weight: 400;
|
15957
15962
|
padding-right: 1em;
|
15958
15963
|
white-space: nowrap;
|
15959
15964
|
}
|
15960
15965
|
|
15961
|
-
.
|
15966
|
+
._compact_9qith_15 ._cellKey_9qith_19 {
|
15962
15967
|
font-weight: 400;
|
15963
15968
|
padding-right: 0.2em;
|
15964
15969
|
white-space: nowrap;
|
15965
15970
|
}
|
15966
15971
|
|
15967
|
-
.
|
15972
|
+
._cellValue_9qith_31 {
|
15968
15973
|
font-weight: 300;
|
15969
15974
|
white-space: pre-wrap;
|
15970
15975
|
word-wrap: anywhere;
|
@@ -19331,53 +19336,51 @@ span.ap-marker-container:hover span.ap-marker {
|
|
19331
19336
|
._progress_1qsnv_12 {
|
19332
19337
|
margin-left: 0.5em;
|
19333
19338
|
}
|
19334
|
-
.
|
19339
|
+
._darkenedBg_eq9th_1 {
|
19335
19340
|
background-color: var(--bs-light-bg-subtle);
|
19336
19341
|
}
|
19337
19342
|
|
19338
|
-
.
|
19343
|
+
._normalBg_eq9th_5 {
|
19339
19344
|
background-color: var(--bs-body-bg);
|
19340
19345
|
}
|
19341
19346
|
|
19342
|
-
.
|
19347
|
+
._node_eq9th_9._first_eq9th_9 {
|
19343
19348
|
padding-top: 0.5em;
|
19344
19349
|
}
|
19345
19350
|
|
19346
|
-
.
|
19351
|
+
._node_eq9th_9 {
|
19347
19352
|
padding-top: 0;
|
19348
19353
|
padding-bottom: 0.5em;
|
19349
19354
|
}
|
19350
19355
|
|
19351
|
-
.
|
19356
|
+
._nodes_eq9th_18 {
|
19352
19357
|
width: 100%;
|
19353
19358
|
margin-top: 1em;
|
19354
19359
|
}
|
19355
19360
|
|
19356
|
-
.
|
19361
|
+
._transcriptComponent_eq9th_23 {
|
19357
19362
|
display: grid;
|
19358
19363
|
margin: 0.5em 0 0 0;
|
19359
19364
|
width: 100%;
|
19360
19365
|
}
|
19361
19366
|
|
19362
|
-
.
|
19367
|
+
._eventNode_eq9th_29 {
|
19363
19368
|
background-color: var(--bs-body-bg);
|
19364
|
-
margin-bottom: 1.5em;
|
19365
|
-
padding-bottom: 0.5em;
|
19366
19369
|
}
|
19367
19370
|
|
19368
|
-
.
|
19371
|
+
._eventNode_eq9th_29._darkenBg_eq9th_33 {
|
19369
19372
|
background-color: var(--bs-light-bg-subtle);
|
19370
19373
|
}
|
19371
19374
|
|
19372
|
-
.
|
19375
|
+
._eventNode_eq9th_29._lastNode_eq9th_37 {
|
19373
19376
|
margin-bottom: 0;
|
19374
19377
|
}
|
19375
19378
|
|
19376
|
-
.
|
19379
|
+
._eventNodeContainer_eq9th_41 {
|
19377
19380
|
padding-bottom: 0.5em;
|
19378
19381
|
}
|
19379
19382
|
|
19380
|
-
.
|
19383
|
+
._eventNodeContainer_eq9th_41._noBottom_eq9th_45 {
|
19381
19384
|
padding-bottom: 0;
|
19382
19385
|
}
|
19383
19386
|
._darkenedBg_1ye6u_1 {
|
@@ -19645,6 +19648,22 @@ span.ap-marker-container:hover span.ap-marker {
|
|
19645
19648
|
._text_1yknn_20 {
|
19646
19649
|
margin-top: -2px;
|
19647
19650
|
}
|
19651
|
+
._container_304w9_1 {
|
19652
|
+
display: grid;
|
19653
|
+
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
19654
|
+
row-gap: 2em;
|
19655
|
+
column-gap: 1em;
|
19656
|
+
}
|
19657
|
+
|
19658
|
+
._modelInfo_304w9_8 {
|
19659
|
+
display: grid;
|
19660
|
+
grid-template-columns: max-content auto;
|
19661
|
+
column-gap: 1em;
|
19662
|
+
}
|
19663
|
+
|
19664
|
+
._role_304w9_14 {
|
19665
|
+
grid-column: -1/1;
|
19666
|
+
}
|
19648
19667
|
._item_1uzhd_1 {
|
19649
19668
|
margin-bottom: 0em;
|
19650
19669
|
}
|
@@ -19808,55 +19827,71 @@ span.ap-marker-container:hover span.ap-marker {
|
|
19808
19827
|
._copyButton_1goi8_1:hover {
|
19809
19828
|
opacity: 0.75;
|
19810
19829
|
}
|
19811
|
-
.
|
19830
|
+
._container_q17yq_1 {
|
19831
|
+
display: flex;
|
19832
|
+
flex-direction: row;
|
19833
|
+
flex-wrap: wrap;
|
19834
|
+
gap: 0;
|
19835
|
+
margin-top: -0.2rem;
|
19836
|
+
margin-bottom: 0.2rem;
|
19837
|
+
}
|
19838
|
+
|
19839
|
+
._grid_q17yq_10 {
|
19840
|
+
display: grid;
|
19841
|
+
grid-template-rows: repeat(auto-fill, minmax(10px, 1fr));
|
19842
|
+
grid-template-columns: 1fr;
|
19843
|
+
gap: 0.1em;
|
19844
|
+
padding-right: 1em;
|
19845
|
+
}
|
19846
|
+
._container_291sb_1 {
|
19812
19847
|
display: flex;
|
19813
19848
|
padding-top: 0;
|
19814
19849
|
margin-left: 0.5rem;
|
19815
19850
|
min-width: 250px;
|
19816
19851
|
}
|
19817
19852
|
|
19818
|
-
.
|
19853
|
+
._wrapper_291sb_8 {
|
19819
19854
|
display: grid;
|
19820
19855
|
grid-template-columns: minmax(auto, 1fr) 1fr;
|
19821
19856
|
width: 100%;
|
19822
19857
|
}
|
19823
19858
|
|
19824
|
-
.
|
19859
|
+
._toggle_291sb_14 {
|
19825
19860
|
padding: 0rem 0.1rem 0.1rem 0rem;
|
19826
19861
|
display: flex;
|
19827
19862
|
}
|
19828
19863
|
|
19829
|
-
.
|
19864
|
+
._body_291sb_19 {
|
19830
19865
|
display: flex;
|
19831
19866
|
flex-direction: column;
|
19832
19867
|
margin-left: 0.2rem;
|
19833
19868
|
}
|
19834
19869
|
|
19835
|
-
.
|
19870
|
+
._bodyContainer_291sb_25 {
|
19836
19871
|
margin-top: 0.1rem;
|
19837
19872
|
display: grid;
|
19838
19873
|
grid-template-columns: minmax(30px, max-content) minmax(100px, max-content);
|
19839
19874
|
}
|
19840
19875
|
|
19841
|
-
.
|
19876
|
+
._taskTitle_291sb_31 {
|
19842
19877
|
font-weight: 600;
|
19843
19878
|
margin-right: 0.3rem;
|
19844
19879
|
}
|
19845
19880
|
|
19846
|
-
.
|
19881
|
+
._taskModel_291sb_36 {
|
19847
19882
|
padding-top: 0.4rem;
|
19848
19883
|
}
|
19849
19884
|
|
19850
|
-
.
|
19885
|
+
._taskStatus_291sb_40 {
|
19851
19886
|
display: flex;
|
19852
19887
|
justify-content: end;
|
19853
19888
|
margin-right: 1em;
|
19854
19889
|
margin-bottom: 0;
|
19855
19890
|
}
|
19856
19891
|
|
19857
|
-
.
|
19892
|
+
._secondaryContainer_291sb_47 {
|
19858
19893
|
opacity: 0.7;
|
19859
|
-
margin-top: 0.1rem;
|
19894
|
+
margin-top: -0.1rem;
|
19860
19895
|
padding-bottom: 0;
|
19861
19896
|
display: grid;
|
19862
19897
|
grid-template-columns: minmax(0, max-content) max-content;
|