inspect-ai 0.3.72__py3-none-any.whl → 0.3.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. inspect_ai/_cli/eval.py +14 -3
  2. inspect_ai/_cli/sandbox.py +3 -3
  3. inspect_ai/_cli/score.py +6 -4
  4. inspect_ai/_cli/trace.py +53 -6
  5. inspect_ai/_display/core/config.py +1 -1
  6. inspect_ai/_display/core/display.py +2 -1
  7. inspect_ai/_display/core/footer.py +6 -6
  8. inspect_ai/_display/plain/display.py +11 -6
  9. inspect_ai/_display/rich/display.py +23 -13
  10. inspect_ai/_display/textual/app.py +10 -9
  11. inspect_ai/_display/textual/display.py +2 -2
  12. inspect_ai/_display/textual/widgets/footer.py +4 -0
  13. inspect_ai/_display/textual/widgets/samples.py +14 -5
  14. inspect_ai/_eval/context.py +1 -2
  15. inspect_ai/_eval/eval.py +54 -41
  16. inspect_ai/_eval/loader.py +9 -2
  17. inspect_ai/_eval/run.py +148 -81
  18. inspect_ai/_eval/score.py +13 -8
  19. inspect_ai/_eval/task/images.py +31 -21
  20. inspect_ai/_eval/task/run.py +62 -59
  21. inspect_ai/_eval/task/rundir.py +16 -9
  22. inspect_ai/_eval/task/sandbox.py +7 -8
  23. inspect_ai/_eval/task/util.py +7 -0
  24. inspect_ai/_util/_async.py +118 -10
  25. inspect_ai/_util/constants.py +0 -2
  26. inspect_ai/_util/file.py +15 -29
  27. inspect_ai/_util/future.py +37 -0
  28. inspect_ai/_util/http.py +3 -99
  29. inspect_ai/_util/httpx.py +60 -0
  30. inspect_ai/_util/interrupt.py +2 -2
  31. inspect_ai/_util/json.py +5 -52
  32. inspect_ai/_util/logger.py +30 -86
  33. inspect_ai/_util/retry.py +10 -61
  34. inspect_ai/_util/trace.py +2 -2
  35. inspect_ai/_view/server.py +86 -3
  36. inspect_ai/_view/www/dist/assets/index.js +25837 -13269
  37. inspect_ai/_view/www/log-schema.json +253 -186
  38. inspect_ai/_view/www/package.json +2 -2
  39. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
  40. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
  41. inspect_ai/_view/www/src/types/log.d.ts +122 -94
  42. inspect_ai/approval/_human/manager.py +6 -10
  43. inspect_ai/approval/_human/panel.py +2 -2
  44. inspect_ai/dataset/_sources/util.py +7 -6
  45. inspect_ai/log/__init__.py +4 -0
  46. inspect_ai/log/_file.py +35 -61
  47. inspect_ai/log/_log.py +18 -1
  48. inspect_ai/log/_recorders/eval.py +14 -23
  49. inspect_ai/log/_recorders/json.py +3 -18
  50. inspect_ai/log/_samples.py +27 -2
  51. inspect_ai/log/_transcript.py +8 -8
  52. inspect_ai/model/__init__.py +2 -1
  53. inspect_ai/model/_call_tools.py +60 -40
  54. inspect_ai/model/_chat_message.py +3 -2
  55. inspect_ai/model/_generate_config.py +25 -0
  56. inspect_ai/model/_model.py +74 -36
  57. inspect_ai/model/_openai.py +9 -1
  58. inspect_ai/model/_providers/anthropic.py +24 -26
  59. inspect_ai/model/_providers/azureai.py +11 -9
  60. inspect_ai/model/_providers/bedrock.py +33 -24
  61. inspect_ai/model/_providers/cloudflare.py +8 -9
  62. inspect_ai/model/_providers/goodfire.py +7 -3
  63. inspect_ai/model/_providers/google.py +47 -13
  64. inspect_ai/model/_providers/groq.py +15 -15
  65. inspect_ai/model/_providers/hf.py +24 -17
  66. inspect_ai/model/_providers/mistral.py +36 -20
  67. inspect_ai/model/_providers/openai.py +30 -25
  68. inspect_ai/model/_providers/openai_o1.py +1 -1
  69. inspect_ai/model/_providers/providers.py +1 -1
  70. inspect_ai/model/_providers/together.py +3 -4
  71. inspect_ai/model/_providers/util/__init__.py +2 -2
  72. inspect_ai/model/_providers/util/chatapi.py +6 -19
  73. inspect_ai/model/_providers/util/hooks.py +165 -0
  74. inspect_ai/model/_providers/vertex.py +20 -3
  75. inspect_ai/model/_providers/vllm.py +16 -19
  76. inspect_ai/scorer/_multi.py +5 -2
  77. inspect_ai/solver/_bridge/patch.py +31 -1
  78. inspect_ai/solver/_fork.py +5 -3
  79. inspect_ai/solver/_human_agent/agent.py +3 -2
  80. inspect_ai/tool/__init__.py +8 -2
  81. inspect_ai/tool/_tool_info.py +4 -90
  82. inspect_ai/tool/_tool_params.py +4 -34
  83. inspect_ai/tool/_tools/_web_search.py +30 -24
  84. inspect_ai/util/__init__.py +4 -0
  85. inspect_ai/util/_concurrency.py +5 -6
  86. inspect_ai/util/_display.py +6 -0
  87. inspect_ai/util/_json.py +170 -0
  88. inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
  89. inspect_ai/util/_sandbox/docker/docker.py +5 -0
  90. inspect_ai/util/_sandbox/environment.py +56 -9
  91. inspect_ai/util/_sandbox/service.py +12 -5
  92. inspect_ai/util/_subprocess.py +94 -113
  93. inspect_ai/util/_subtask.py +2 -4
  94. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
  95. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +99 -99
  96. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
  97. inspect_ai/_util/timeouts.py +0 -160
  98. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  99. inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
  100. inspect_ai/model/_providers/util/tracker.py +0 -92
  101. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
  102. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
  103. {inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0
@@ -21,11 +21,7 @@ export type Location = string | null;
21
21
  export type Samples = number | null;
22
22
  export type SampleIds = (number | string)[] | null;
23
23
  export type Shuffled = boolean | null;
24
- /**
25
- * @minItems 1
26
- * @maxItems 2
27
- */
28
- export type SandboxEnvironmentSpec = [unknown] | [unknown, unknown];
24
+ export type Type = string;
29
25
  export type Model = string;
30
26
  export type ModelBaseUrl = string | null;
31
27
  export type Limit = number | [unknown, unknown] | null;
@@ -49,7 +45,7 @@ export type LogSamples = boolean | null;
49
45
  export type LogImages = boolean | null;
50
46
  export type LogBuffer = number | null;
51
47
  export type ScoreDisplay = boolean | null;
52
- export type Type = "git";
48
+ export type Type1 = "git";
53
49
  export type Origin = string;
54
50
  export type Commit = string;
55
51
  export type Metadata = {} | null;
@@ -105,12 +101,26 @@ export type CachePrompt = "auto" | boolean | null;
105
101
  export type ReasoningEffort = ("low" | "medium" | "high") | null;
106
102
  export type ReasoningTokens = number | null;
107
103
  export type ReasoningHistory = ("none" | "all" | "last" | "auto") | null;
104
+ export type Name5 = string;
105
+ export type Type2 =
106
+ | ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
107
+ | null;
108
+ export type Description = string | null;
109
+ export type Enum = unknown[] | null;
110
+ export type Properties = {
111
+ [k: string]: JSONSchema;
112
+ } | null;
113
+ export type Additionalproperties = JSONSchema | boolean | null;
114
+ export type Anyof = JSONSchema[] | null;
115
+ export type Required = string[] | null;
116
+ export type Description1 = string | null;
117
+ export type Strict = boolean | null;
108
118
  export type TotalSamples = number;
109
119
  export type CompletedSamples = number;
110
- export type Name5 = string;
120
+ export type Name6 = string;
111
121
  export type Scorer = string;
112
122
  export type Reducer = string | null;
113
- export type Name6 = string;
123
+ export type Name7 = string;
114
124
  export type Value = number;
115
125
  export type Metadata2 = {} | null;
116
126
  export type Metadata3 = {} | null;
@@ -138,7 +148,7 @@ export type Input =
138
148
  | ChatMessageAssistant
139
149
  | ChatMessageTool
140
150
  )[];
141
- export type Role = "system";
151
+ export type Id1 = string;
142
152
  export type Content =
143
153
  | string
144
154
  | (
@@ -148,23 +158,24 @@ export type Content =
148
158
  | ContentAudio
149
159
  | ContentVideo
150
160
  )[];
151
- export type Type1 = "text";
161
+ export type Type3 = "text";
152
162
  export type Text = string;
153
- export type Type2 = "reasoning";
163
+ export type Type4 = "reasoning";
154
164
  export type Reasoning = string;
155
165
  export type Signature = string | null;
156
166
  export type Redacted = boolean;
157
- export type Type3 = "image";
167
+ export type Type5 = "image";
158
168
  export type Image = string;
159
169
  export type Detail = "auto" | "low" | "high";
160
- export type Type4 = "audio";
170
+ export type Type6 = "audio";
161
171
  export type Audio = string;
162
172
  export type Format = "wav" | "mp3";
163
- export type Type5 = "video";
173
+ export type Type7 = "video";
164
174
  export type Video = string;
165
175
  export type Format1 = "mp4" | "mpeg" | "mov";
166
176
  export type Source = ("input" | "generate") | null;
167
- export type Role1 = "user";
177
+ export type Role = "system";
178
+ export type Id2 = string;
168
179
  export type Content1 =
169
180
  | string
170
181
  | (
@@ -175,8 +186,9 @@ export type Content1 =
175
186
  | ContentVideo
176
187
  )[];
177
188
  export type Source1 = ("input" | "generate") | null;
189
+ export type Role1 = "user";
178
190
  export type ToolCallId = string[] | null;
179
- export type Role2 = "assistant";
191
+ export type Id3 = string;
180
192
  export type Content2 =
181
193
  | string
182
194
  | (
@@ -187,15 +199,16 @@ export type Content2 =
187
199
  | ContentVideo
188
200
  )[];
189
201
  export type Source2 = ("input" | "generate") | null;
202
+ export type Role2 = "assistant";
190
203
  export type ToolCalls = ToolCall[] | null;
191
- export type Id1 = string;
204
+ export type Id4 = string;
192
205
  export type Function = string;
193
- export type Type6 = "function";
206
+ export type Type8 = "function";
194
207
  export type ParseError = string | null;
195
208
  export type Title = string | null;
196
209
  export type Format2 = "text" | "markdown";
197
210
  export type Content3 = string;
198
- export type Role3 = "tool";
211
+ export type Id5 = string;
199
212
  export type Content4 =
200
213
  | string
201
214
  | (
@@ -206,9 +219,10 @@ export type Content4 =
206
219
  | ContentVideo
207
220
  )[];
208
221
  export type Source3 = ("input" | "generate") | null;
222
+ export type Role3 = "tool";
209
223
  export type ToolCallId1 = string | null;
210
224
  export type Function1 = string | null;
211
- export type Type7 =
225
+ export type Type9 =
212
226
  | "parsing"
213
227
  | "timeout"
214
228
  | "unicode_decode"
@@ -277,7 +291,7 @@ export type Input1 =
277
291
  )[];
278
292
  export type Choices2 = string[] | null;
279
293
  export type Target1 = string | string[];
280
- export type Id2 = number | string | null;
294
+ export type Id6 = number | string | null;
281
295
  export type Metadata8 = {} | null;
282
296
  export type Files1 = {
283
297
  [k: string]: string;
@@ -288,7 +302,7 @@ export type Timestamp1 = string;
288
302
  export type WorkingStart1 = number;
289
303
  export type Pending1 = boolean | null;
290
304
  export type Event1 = "sample_limit";
291
- export type Type8 =
305
+ export type Type10 =
292
306
  | "message"
293
307
  | "time"
294
308
  | "working"
@@ -335,25 +349,14 @@ export type Input3 = (
335
349
  | ChatMessageAssistant
336
350
  | ChatMessageTool
337
351
  )[];
338
- export type Name7 = string;
339
- export type Description = string;
340
- export type Type9 = "object";
341
- export type Type10 =
342
- | ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
343
- | null;
344
- export type Description1 = string | null;
345
- export type Enum = unknown[] | null;
346
- export type Properties1 = {
347
- [k: string]: ToolParam;
348
- } | null;
349
- export type Additionalproperties = ToolParam | boolean | null;
350
- export type Anyof = ToolParam[] | null;
351
- export type Required = string[] | null;
352
+ export type Name8 = string;
353
+ export type Description2 = string;
354
+ export type Type11 = "object";
352
355
  export type Required1 = string[];
353
356
  export type Additionalproperties1 = boolean;
354
357
  export type Tools1 = ToolInfo[];
355
358
  export type ToolChoice = ("auto" | "any" | "none") | ToolFunction;
356
- export type Name8 = string;
359
+ export type Name9 = string;
357
360
  export type Error1 = string | null;
358
361
  export type Cache = ("read" | "write") | null;
359
362
  export type Time1 = number | null;
@@ -363,8 +366,8 @@ export type Timestamp6 = string;
363
366
  export type WorkingStart6 = number;
364
367
  export type Pending6 = boolean | null;
365
368
  export type Event6 = "tool";
366
- export type Type11 = "function";
367
- export type Id3 = string;
369
+ export type Type12 = "function";
370
+ export type Id7 = string;
368
371
  export type Function2 = string;
369
372
  export type Result1 =
370
373
  | string
@@ -416,7 +419,7 @@ export type Timestamp11 = string;
416
419
  export type WorkingStart11 = number;
417
420
  export type Pending11 = boolean | null;
418
421
  export type Event11 = "logger";
419
- export type Name9 = string | null;
422
+ export type Name10 = string | null;
420
423
  export type Level =
421
424
  | "debug"
422
425
  | "trace"
@@ -441,14 +444,14 @@ export type WorkingStart13 = number;
441
444
  export type Pending13 = boolean | null;
442
445
  export type Event13 = "step";
443
446
  export type Action1 = "begin" | "end";
444
- export type Type12 = string | null;
445
- export type Name10 = string;
447
+ export type Type13 = string | null;
448
+ export type Name11 = string;
446
449
  export type Timestamp14 = string;
447
450
  export type WorkingStart14 = number;
448
451
  export type Pending14 = boolean | null;
449
452
  export type Event14 = "subtask";
450
- export type Name11 = string;
451
- export type Type13 = string | null;
453
+ export type Name12 = string;
454
+ export type Type14 = string | null;
452
455
  export type Events2 = (
453
456
  | SampleInitEvent
454
457
  | SampleLimitEvent
@@ -507,7 +510,7 @@ export type Events = (
507
510
  export type TotalTime = number | null;
508
511
  export type WorkingTime3 = number | null;
509
512
  export type Uuid = string | null;
510
- export type Type14 =
513
+ export type Type15 =
511
514
  | "context"
512
515
  | "time"
513
516
  | "working"
@@ -588,6 +591,16 @@ export interface EvalDataset {
588
591
  sample_ids: SampleIds;
589
592
  shuffled: Shuffled;
590
593
  }
594
+ /**
595
+ * Specification of a SandboxEnvironment.
596
+ */
597
+ export interface SandboxEnvironmentSpec {
598
+ type: Type;
599
+ config: Config;
600
+ }
601
+ export interface Config {
602
+ [k: string]: unknown;
603
+ }
591
604
  export interface ModelArgs {}
592
605
  /**
593
606
  * Configuration used for evaluation.
@@ -642,7 +655,7 @@ export interface Params {}
642
655
  * Git revision for evaluation.
643
656
  */
644
657
  export interface EvalRevision {
645
- type: Type;
658
+ type: Type1;
646
659
  origin: Origin;
647
660
  commit: Commit;
648
661
  }
@@ -704,6 +717,33 @@ export interface GenerateConfig {
704
717
  reasoning_effort: ReasoningEffort;
705
718
  reasoning_tokens: ReasoningTokens;
706
719
  reasoning_history: ReasoningHistory;
720
+ response_schema: ResponseSchema | null;
721
+ }
722
+ /**
723
+ * Schema for model response when using Structured Output.
724
+ */
725
+ export interface ResponseSchema {
726
+ name: Name5;
727
+ json_schema: JSONSchema;
728
+ description: Description1;
729
+ strict: Strict;
730
+ }
731
+ /**
732
+ * JSON Schema for type.
733
+ */
734
+ export interface JSONSchema {
735
+ type: Type2;
736
+ description: Description;
737
+ default: Default;
738
+ enum: Enum;
739
+ items: JSONSchema | null;
740
+ properties: Properties;
741
+ additionalProperties: Additionalproperties;
742
+ anyOf: Anyof;
743
+ required: Required;
744
+ }
745
+ export interface Default {
746
+ [k: string]: unknown;
707
747
  }
708
748
  /**
709
749
  * Scoring results from evaluation.
@@ -718,7 +758,7 @@ export interface EvalResults {
718
758
  * Score for evaluation task.
719
759
  */
720
760
  export interface EvalScore {
721
- name: Name5;
761
+ name: Name6;
722
762
  scorer: Scorer;
723
763
  reducer: Reducer;
724
764
  params: Params2;
@@ -733,7 +773,7 @@ export interface Metrics2 {
733
773
  * Metric for evaluation score.
734
774
  */
735
775
  export interface EvalMetric {
736
- name: Name6;
776
+ name: Name7;
737
777
  value: Value;
738
778
  params: Params3;
739
779
  metadata: Metadata2;
@@ -799,15 +839,16 @@ export interface EvalSample {
799
839
  * System chat message.
800
840
  */
801
841
  export interface ChatMessageSystem {
802
- role: Role;
842
+ id: Id1;
803
843
  content: Content;
804
844
  source: Source;
845
+ role: Role;
805
846
  }
806
847
  /**
807
848
  * Text content.
808
849
  */
809
850
  export interface ContentText {
810
- type: Type1;
851
+ type: Type3;
811
852
  text: Text;
812
853
  }
813
854
  /**
@@ -816,7 +857,7 @@ export interface ContentText {
816
857
  * See the specification for [thinking blocks](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks) for Claude models.
817
858
  */
818
859
  export interface ContentReasoning {
819
- type: Type2;
860
+ type: Type4;
820
861
  reasoning: Reasoning;
821
862
  signature: Signature;
822
863
  redacted: Redacted;
@@ -825,7 +866,7 @@ export interface ContentReasoning {
825
866
  * Image content.
826
867
  */
827
868
  export interface ContentImage {
828
- type: Type3;
869
+ type: Type5;
829
870
  image: Image;
830
871
  detail: Detail;
831
872
  }
@@ -833,7 +874,7 @@ export interface ContentImage {
833
874
  * Audio content.
834
875
  */
835
876
  export interface ContentAudio {
836
- type: Type4;
877
+ type: Type6;
837
878
  audio: Audio;
838
879
  format: Format;
839
880
  }
@@ -841,7 +882,7 @@ export interface ContentAudio {
841
882
  * Video content.
842
883
  */
843
884
  export interface ContentVideo {
844
- type: Type5;
885
+ type: Type7;
845
886
  video: Video;
846
887
  format: Format1;
847
888
  }
@@ -849,25 +890,27 @@ export interface ContentVideo {
849
890
  * User chat message.
850
891
  */
851
892
  export interface ChatMessageUser {
852
- role: Role1;
893
+ id: Id2;
853
894
  content: Content1;
854
895
  source: Source1;
896
+ role: Role1;
855
897
  tool_call_id: ToolCallId;
856
898
  }
857
899
  /**
858
900
  * Assistant chat message.
859
901
  */
860
902
  export interface ChatMessageAssistant {
861
- role: Role2;
903
+ id: Id3;
862
904
  content: Content2;
863
905
  source: Source2;
906
+ role: Role2;
864
907
  tool_calls: ToolCalls;
865
908
  }
866
909
  export interface ToolCall {
867
- id: Id1;
910
+ id: Id4;
868
911
  function: Function;
869
912
  arguments: Arguments;
870
- type: Type6;
913
+ type: Type8;
871
914
  parse_error: ParseError;
872
915
  view: ToolCallContent | null;
873
916
  }
@@ -884,15 +927,16 @@ export interface ToolCallContent {
884
927
  * Tool chat message.
885
928
  */
886
929
  export interface ChatMessageTool {
887
- role: Role3;
930
+ id: Id5;
888
931
  content: Content4;
889
932
  source: Source3;
933
+ role: Role3;
890
934
  tool_call_id: ToolCallId1;
891
935
  function: Function1;
892
936
  error: ToolCallError | null;
893
937
  }
894
938
  export interface ToolCallError {
895
- type: Type7;
939
+ type: Type9;
896
940
  message: Message1;
897
941
  }
898
942
  /**
@@ -966,7 +1010,7 @@ export interface Sample {
966
1010
  input: Input1;
967
1011
  choices: Choices2;
968
1012
  target: Target1;
969
- id: Id2;
1013
+ id: Id6;
970
1014
  metadata: Metadata8;
971
1015
  sandbox: SandboxEnvironmentSpec | null;
972
1016
  files: Files1;
@@ -980,7 +1024,7 @@ export interface SampleLimitEvent {
980
1024
  working_start: WorkingStart1;
981
1025
  pending: Pending1;
982
1026
  event: Event1;
983
- type: Type8;
1027
+ type: Type10;
984
1028
  message: Message2;
985
1029
  limit: Limit1;
986
1030
  }
@@ -1082,41 +1126,24 @@ export interface ModelEvent {
1082
1126
  * ```
1083
1127
  */
1084
1128
  export interface ToolInfo {
1085
- name: Name7;
1086
- description: Description;
1129
+ name: Name8;
1130
+ description: Description2;
1087
1131
  parameters: ToolParams;
1088
1132
  }
1089
1133
  /**
1090
1134
  * Description of tool parameters object in JSON Schema format.
1091
1135
  */
1092
1136
  export interface ToolParams {
1093
- type: Type9;
1094
- properties: Properties;
1137
+ type: Type11;
1138
+ properties: Properties1;
1095
1139
  required: Required1;
1096
1140
  additionalProperties: Additionalproperties1;
1097
1141
  }
1098
- export interface Properties {
1099
- [k: string]: ToolParam;
1100
- }
1101
- /**
1102
- * Description of tool parameter in JSON Schema format.
1103
- */
1104
- export interface ToolParam {
1105
- type: Type10;
1106
- description: Description1;
1107
- default: Default;
1108
- enum: Enum;
1109
- items: ToolParam | null;
1110
- properties: Properties1;
1111
- additionalProperties: Additionalproperties;
1112
- anyOf: Anyof;
1113
- required: Required;
1114
- }
1115
- export interface Default {
1116
- [k: string]: unknown;
1142
+ export interface Properties1 {
1143
+ [k: string]: JSONSchema;
1117
1144
  }
1118
1145
  export interface ToolFunction {
1119
- name: Name8;
1146
+ name: Name9;
1120
1147
  }
1121
1148
  /**
1122
1149
  * Model generation options.
@@ -1146,6 +1173,7 @@ export interface GenerateConfig1 {
1146
1173
  reasoning_effort: ReasoningEffort;
1147
1174
  reasoning_tokens: ReasoningTokens;
1148
1175
  reasoning_history: ReasoningHistory;
1176
+ response_schema: ResponseSchema | null;
1149
1177
  }
1150
1178
  /**
1151
1179
  * Model call (raw request/response data).
@@ -1169,8 +1197,8 @@ export interface ToolEvent {
1169
1197
  working_start: WorkingStart6;
1170
1198
  pending: Pending6;
1171
1199
  event: Event6;
1172
- type: Type11;
1173
- id: Id3;
1200
+ type: Type12;
1201
+ id: Id7;
1174
1202
  function: Function2;
1175
1203
  arguments: Arguments1;
1176
1204
  view: ToolCallContent | null;
@@ -1260,7 +1288,7 @@ export interface LoggerEvent {
1260
1288
  * Message written to Python log.
1261
1289
  */
1262
1290
  export interface LoggingMessage {
1263
- name: Name9;
1291
+ name: Name10;
1264
1292
  level: Level;
1265
1293
  message: Message4;
1266
1294
  created: Created1;
@@ -1288,8 +1316,8 @@ export interface StepEvent {
1288
1316
  pending: Pending13;
1289
1317
  event: Event13;
1290
1318
  action: Action1;
1291
- type: Type12;
1292
- name: Name10;
1319
+ type: Type13;
1320
+ name: Name11;
1293
1321
  }
1294
1322
  /**
1295
1323
  * Subtask spawned.
@@ -1299,8 +1327,8 @@ export interface SubtaskEvent {
1299
1327
  working_start: WorkingStart14;
1300
1328
  pending: Pending14;
1301
1329
  event: Event14;
1302
- name: Name11;
1303
- type: Type13;
1330
+ name: Name12;
1331
+ type: Type14;
1304
1332
  input: Input5;
1305
1333
  result: Result2;
1306
1334
  events: Events2;
@@ -1321,7 +1349,7 @@ export interface Attachments {
1321
1349
  * Limit encontered by sample.
1322
1350
  */
1323
1351
  export interface EvalSampleLimit {
1324
- type: Type14;
1352
+ type: Type15;
1325
1353
  limit: Limit2;
1326
1354
  }
1327
1355
  /**
@@ -1,9 +1,8 @@
1
- import asyncio
2
1
  import uuid
3
- from asyncio import Future
4
2
  from contextvars import ContextVar
5
- from typing import Callable, Literal, NamedTuple, cast
3
+ from typing import Callable, Literal, NamedTuple
6
4
 
5
+ from inspect_ai._util.future import Future
7
6
  from inspect_ai.solver._task_state import TaskState
8
7
  from inspect_ai.tool._tool_call import ToolCall, ToolCallView
9
8
 
@@ -37,7 +36,6 @@ class HumanApprovalManager:
37
36
  from inspect_ai.log._samples import sample_active
38
37
 
39
38
  id = str(uuid.uuid4())
40
- future = cast(Future[Approval], asyncio.get_event_loop().create_future())
41
39
  sample = sample_active()
42
40
  assert sample
43
41
  assert sample.sample.id is not None
@@ -48,7 +46,7 @@ class HumanApprovalManager:
48
46
  id=sample.sample.id,
49
47
  epoch=sample.epoch,
50
48
  )
51
- self._approval_requests[id] = (pending, future)
49
+ self._approval_requests[id] = (pending, Future[Approval]())
52
50
  self._notify_change("add")
53
51
  return id
54
52
 
@@ -58,7 +56,7 @@ class HumanApprovalManager:
58
56
 
59
57
  async def wait_for_approval(self, id: str) -> Approval:
60
58
  _, future = self._approval_requests[id]
61
- return await future
59
+ return await future.result()
62
60
 
63
61
  def on_change(
64
62
  self, callback: Callable[[Literal["add", "remove"]], None]
@@ -77,16 +75,14 @@ class HumanApprovalManager:
77
75
  def complete_approval(self, id: str, result: Approval) -> None:
78
76
  if id in self._approval_requests:
79
77
  _, future = self._approval_requests[id]
80
- if not future.done():
81
- future.set_result(result)
78
+ future.set_result(result)
82
79
  del self._approval_requests[id]
83
80
  self._notify_change("remove")
84
81
 
85
82
  def fail_approval(self, id: str, error: Exception) -> None:
86
83
  if id in self._approval_requests:
87
84
  _, future = self._approval_requests[id]
88
- if not future.done():
89
- future.set_exception(error)
85
+ future.set_exception(error)
90
86
  del self._approval_requests[id]
91
87
  self._notify_change("remove")
92
88
 
@@ -1,6 +1,6 @@
1
- from asyncio import CancelledError
2
1
  from typing import Callable, Literal
3
2
 
3
+ import anyio
4
4
  from rich.console import RenderableType
5
5
  from rich.text import Text
6
6
  from textual.app import ComposeResult
@@ -44,7 +44,7 @@ async def panel_approval(
44
44
  )
45
45
  try:
46
46
  return await approvals.wait_for_approval(id)
47
- except CancelledError:
47
+ except anyio.get_cancelled_exc_class():
48
48
  approvals.withdraw_request(id)
49
49
  raise
50
50
 
@@ -63,12 +63,13 @@ def message_with_resolved_content(
63
63
  message: ChatMessage, resolver: Callable[[str], str]
64
64
  ) -> ChatMessage:
65
65
  if isinstance(message, ChatMessageUser) and not isinstance(message.content, str):
66
- return ChatMessageUser(
67
- content=[
68
- chat_content_with_resolved_content(content, resolver)
69
- for content in message.content
70
- ],
71
- source=message.source,
66
+ return message.model_copy(
67
+ update=dict(
68
+ content=[
69
+ chat_content_with_resolved_content(content, resolver)
70
+ for content in message.content
71
+ ],
72
+ )
72
73
  )
73
74
  else:
74
75
  return message
@@ -7,9 +7,11 @@ from ._file import (
7
7
  EvalLogInfo,
8
8
  list_eval_logs,
9
9
  read_eval_log,
10
+ read_eval_log_async,
10
11
  read_eval_log_sample,
11
12
  read_eval_log_samples,
12
13
  write_eval_log,
14
+ write_eval_log_async,
13
15
  write_log_dir_manifest,
14
16
  )
15
17
  from ._log import (
@@ -93,11 +95,13 @@ __all__ = [
93
95
  "convert_eval_logs",
94
96
  "list_eval_logs",
95
97
  "read_eval_log",
98
+ "read_eval_log_async",
96
99
  "read_eval_log_sample",
97
100
  "read_eval_log_samples",
98
101
  "condense_sample",
99
102
  "resolve_sample_attachments",
100
103
  "write_eval_log",
104
+ "write_eval_log_async",
101
105
  "write_log_dir_manifest",
102
106
  "retryable_eval_logs",
103
107
  "bundle_log_dir",