inspect-ai 0.3.87__py3-none-any.whl → 0.3.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_view/www/log-schema.json +18 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
- inspect_ai/_view/www/src/types/log.d.ts +126 -123
- inspect_ai/approval/_apply.py +0 -2
- inspect_ai/model/_call_tools.py +11 -1
- inspect_ai/model/_providers/anthropic.py +6 -3
- inspect_ai/tool/_tool_call.py +3 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.88.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.88.dist-info}/RECORD +13 -13
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.88.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.88.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.88.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.87.dist-info → inspect_ai-0.3.88.dist-info}/top_level.txt +0 -0
@@ -2250,6 +2250,9 @@
|
|
2250
2250
|
"title": "Model",
|
2251
2251
|
"type": "string"
|
2252
2252
|
},
|
2253
|
+
"model_generate_config": {
|
2254
|
+
"$ref": "#/$defs/GenerateConfig"
|
2255
|
+
},
|
2253
2256
|
"model_base_url": {
|
2254
2257
|
"anyOf": [
|
2255
2258
|
{
|
@@ -2355,6 +2358,7 @@
|
|
2355
2358
|
"dataset",
|
2356
2359
|
"sandbox",
|
2357
2360
|
"model",
|
2361
|
+
"model_generate_config",
|
2358
2362
|
"model_base_url",
|
2359
2363
|
"model_args",
|
2360
2364
|
"config",
|
@@ -4595,6 +4599,18 @@
|
|
4595
4599
|
}
|
4596
4600
|
],
|
4597
4601
|
"default": null
|
4602
|
+
},
|
4603
|
+
"type": {
|
4604
|
+
"anyOf": [
|
4605
|
+
{
|
4606
|
+
"type": "string"
|
4607
|
+
},
|
4608
|
+
{
|
4609
|
+
"type": "null"
|
4610
|
+
}
|
4611
|
+
],
|
4612
|
+
"default": null,
|
4613
|
+
"title": "Type"
|
4598
4614
|
}
|
4599
4615
|
},
|
4600
4616
|
"required": [
|
@@ -4603,7 +4619,8 @@
|
|
4603
4619
|
"arguments",
|
4604
4620
|
"internal",
|
4605
4621
|
"parse_error",
|
4606
|
-
"view"
|
4622
|
+
"view",
|
4623
|
+
"type"
|
4607
4624
|
],
|
4608
4625
|
"title": "ToolCall",
|
4609
4626
|
"type": "object",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import { FC } from "react";
|
2
2
|
import { ApplicationIcons } from "../../appearance/icons";
|
3
|
-
import { SampleLimitEvent,
|
3
|
+
import { SampleLimitEvent, Type10 } from "../../types/log";
|
4
4
|
import { EventPanel } from "./event/EventPanel";
|
5
5
|
|
6
6
|
interface SampleLimitEventViewProps {
|
@@ -17,7 +17,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
17
17
|
event,
|
18
18
|
className,
|
19
19
|
}) => {
|
20
|
-
const resolve_title = (type:
|
20
|
+
const resolve_title = (type: Type10) => {
|
21
21
|
switch (type) {
|
22
22
|
case "custom":
|
23
23
|
return "Custom Limit Exceeded";
|
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
34
34
|
}
|
35
35
|
};
|
36
36
|
|
37
|
-
const resolve_icon = (type:
|
37
|
+
const resolve_icon = (type: Type10) => {
|
38
38
|
switch (type) {
|
39
39
|
case "custom":
|
40
40
|
return ApplicationIcons.limits.custom;
|
@@ -24,12 +24,52 @@ export type SampleIds = (number | string)[] | null;
|
|
24
24
|
export type Shuffled = boolean | null;
|
25
25
|
export type Type = string;
|
26
26
|
export type Model = string;
|
27
|
+
export type MaxRetries = number | null;
|
28
|
+
export type Timeout = number | null;
|
29
|
+
export type MaxConnections = number | null;
|
30
|
+
export type SystemMessage = string | null;
|
31
|
+
export type MaxTokens = number | null;
|
32
|
+
export type TopP = number | null;
|
33
|
+
export type Temperature = number | null;
|
34
|
+
export type StopSeqs = string[] | null;
|
35
|
+
export type BestOf = number | null;
|
36
|
+
export type FrequencyPenalty = number | null;
|
37
|
+
export type PresencePenalty = number | null;
|
38
|
+
export type LogitBias = {
|
39
|
+
[k: string]: number;
|
40
|
+
} | null;
|
41
|
+
export type Seed = number | null;
|
42
|
+
export type TopK = number | null;
|
43
|
+
export type NumChoices = number | null;
|
44
|
+
export type Logprobs = boolean | null;
|
45
|
+
export type TopLogprobs = number | null;
|
46
|
+
export type ParallelToolCalls = boolean | null;
|
47
|
+
export type InternalTools = boolean | null;
|
48
|
+
export type MaxToolOutput = number | null;
|
49
|
+
export type CachePrompt = "auto" | boolean | null;
|
50
|
+
export type ReasoningEffort = ("low" | "medium" | "high") | null;
|
51
|
+
export type ReasoningTokens = number | null;
|
52
|
+
export type ReasoningHistory = ("none" | "all" | "last" | "auto") | null;
|
53
|
+
export type Name1 = string;
|
54
|
+
export type Type1 =
|
55
|
+
| ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
|
56
|
+
| null;
|
57
|
+
export type Description = string | null;
|
58
|
+
export type Enum = unknown[] | null;
|
59
|
+
export type Properties = {
|
60
|
+
[k: string]: JSONSchema;
|
61
|
+
} | null;
|
62
|
+
export type Additionalproperties = JSONSchema | boolean | null;
|
63
|
+
export type Anyof = JSONSchema[] | null;
|
64
|
+
export type Required = string[] | null;
|
65
|
+
export type Description1 = string | null;
|
66
|
+
export type Strict = boolean | null;
|
27
67
|
export type ModelBaseUrl = string | null;
|
28
68
|
export type Limit = number | [unknown, unknown] | null;
|
29
69
|
export type SampleId = string | number | (string | number)[] | null;
|
30
70
|
export type Epochs = number | null;
|
31
71
|
export type EpochsReducer = string[] | null;
|
32
|
-
export type
|
72
|
+
export type Name2 = string;
|
33
73
|
export type Tools = string | string[];
|
34
74
|
export type Approvers = ApproverPolicyConfig[];
|
35
75
|
export type FailOnError = boolean | number | null;
|
@@ -47,12 +87,12 @@ export type LogImages = boolean | null;
|
|
47
87
|
export type LogBuffer = number | null;
|
48
88
|
export type LogShared = number | null;
|
49
89
|
export type ScoreDisplay = boolean | null;
|
50
|
-
export type
|
90
|
+
export type Type2 = "git";
|
51
91
|
export type Origin = string;
|
52
92
|
export type Commit = string;
|
53
93
|
export type Metadata = {} | null;
|
54
94
|
export type Scorers = EvalScorer[] | null;
|
55
|
-
export type
|
95
|
+
export type Name3 = string;
|
56
96
|
export type Options = {} | null;
|
57
97
|
export type Metrics =
|
58
98
|
| (
|
@@ -65,7 +105,7 @@ export type Metrics =
|
|
65
105
|
[k: string]: EvalMetricDefinition[];
|
66
106
|
}
|
67
107
|
| null;
|
68
|
-
export type
|
108
|
+
export type Name4 = string;
|
69
109
|
export type Options1 = {} | null;
|
70
110
|
export type Metadata1 = {} | null;
|
71
111
|
export type Metrics1 =
|
@@ -74,49 +114,9 @@ export type Metrics1 =
|
|
74
114
|
[k: string]: EvalMetricDefinition[];
|
75
115
|
}
|
76
116
|
| null;
|
77
|
-
export type
|
117
|
+
export type Name5 = string;
|
78
118
|
export type Solver1 = string;
|
79
119
|
export type Steps = EvalPlanStep[];
|
80
|
-
export type MaxRetries = number | null;
|
81
|
-
export type Timeout = number | null;
|
82
|
-
export type MaxConnections = number | null;
|
83
|
-
export type SystemMessage = string | null;
|
84
|
-
export type MaxTokens = number | null;
|
85
|
-
export type TopP = number | null;
|
86
|
-
export type Temperature = number | null;
|
87
|
-
export type StopSeqs = string[] | null;
|
88
|
-
export type BestOf = number | null;
|
89
|
-
export type FrequencyPenalty = number | null;
|
90
|
-
export type PresencePenalty = number | null;
|
91
|
-
export type LogitBias = {
|
92
|
-
[k: string]: number;
|
93
|
-
} | null;
|
94
|
-
export type Seed = number | null;
|
95
|
-
export type TopK = number | null;
|
96
|
-
export type NumChoices = number | null;
|
97
|
-
export type Logprobs = boolean | null;
|
98
|
-
export type TopLogprobs = number | null;
|
99
|
-
export type ParallelToolCalls = boolean | null;
|
100
|
-
export type InternalTools = boolean | null;
|
101
|
-
export type MaxToolOutput = number | null;
|
102
|
-
export type CachePrompt = "auto" | boolean | null;
|
103
|
-
export type ReasoningEffort = ("low" | "medium" | "high") | null;
|
104
|
-
export type ReasoningTokens = number | null;
|
105
|
-
export type ReasoningHistory = ("none" | "all" | "last" | "auto") | null;
|
106
|
-
export type Name5 = string;
|
107
|
-
export type Type2 =
|
108
|
-
| ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
|
109
|
-
| null;
|
110
|
-
export type Description = string | null;
|
111
|
-
export type Enum = unknown[] | null;
|
112
|
-
export type Properties = {
|
113
|
-
[k: string]: JSONSchema;
|
114
|
-
} | null;
|
115
|
-
export type Additionalproperties = JSONSchema | boolean | null;
|
116
|
-
export type Anyof = JSONSchema[] | null;
|
117
|
-
export type Required = string[] | null;
|
118
|
-
export type Description1 = string | null;
|
119
|
-
export type Strict = boolean | null;
|
120
120
|
export type TotalSamples = number;
|
121
121
|
export type CompletedSamples = number;
|
122
122
|
export type Name6 = string;
|
@@ -210,6 +210,7 @@ export type ParseError = string | null;
|
|
210
210
|
export type Title = string | null;
|
211
211
|
export type Format2 = "text" | "markdown";
|
212
212
|
export type Content3 = string;
|
213
|
+
export type Type8 = string | null;
|
213
214
|
export type Model1 = string | null;
|
214
215
|
export type Id5 = string | null;
|
215
216
|
export type Content4 =
|
@@ -225,7 +226,7 @@ export type Source3 = ("input" | "generate") | null;
|
|
225
226
|
export type Role3 = "tool";
|
226
227
|
export type ToolCallId1 = string | null;
|
227
228
|
export type Function1 = string | null;
|
228
|
-
export type
|
229
|
+
export type Type9 =
|
229
230
|
| "parsing"
|
230
231
|
| "timeout"
|
231
232
|
| "unicode_decode"
|
@@ -305,7 +306,7 @@ export type Timestamp1 = string;
|
|
305
306
|
export type WorkingStart1 = number;
|
306
307
|
export type Pending1 = boolean | null;
|
307
308
|
export type Event1 = "sample_limit";
|
308
|
-
export type
|
309
|
+
export type Type10 =
|
309
310
|
| "message"
|
310
311
|
| "time"
|
311
312
|
| "working"
|
@@ -354,7 +355,7 @@ export type Input3 = (
|
|
354
355
|
)[];
|
355
356
|
export type Name8 = string;
|
356
357
|
export type Description2 = string;
|
357
|
-
export type
|
358
|
+
export type Type11 = "object";
|
358
359
|
export type Required1 = string[];
|
359
360
|
export type Additionalproperties1 = boolean;
|
360
361
|
export type Tools1 = ToolInfo[];
|
@@ -369,7 +370,7 @@ export type Timestamp6 = string;
|
|
369
370
|
export type WorkingStart6 = number;
|
370
371
|
export type Pending6 = boolean | null;
|
371
372
|
export type Event6 = "tool";
|
372
|
-
export type
|
373
|
+
export type Type12 = "function";
|
373
374
|
export type Id7 = string;
|
374
375
|
export type Function2 = string;
|
375
376
|
export type Result1 =
|
@@ -447,14 +448,14 @@ export type WorkingStart13 = number;
|
|
447
448
|
export type Pending13 = boolean | null;
|
448
449
|
export type Event13 = "step";
|
449
450
|
export type Action1 = "begin" | "end";
|
450
|
-
export type
|
451
|
+
export type Type13 = string | null;
|
451
452
|
export type Name11 = string;
|
452
453
|
export type Timestamp14 = string;
|
453
454
|
export type WorkingStart14 = number;
|
454
455
|
export type Pending14 = boolean | null;
|
455
456
|
export type Event14 = "subtask";
|
456
457
|
export type Name12 = string;
|
457
|
-
export type
|
458
|
+
export type Type14 = string | null;
|
458
459
|
export type Events2 = (
|
459
460
|
| SampleInitEvent
|
460
461
|
| SampleLimitEvent
|
@@ -515,7 +516,7 @@ export type Events = (
|
|
515
516
|
export type TotalTime = number | null;
|
516
517
|
export type WorkingTime3 = number | null;
|
517
518
|
export type Uuid = string | null;
|
518
|
-
export type
|
519
|
+
export type Type15 =
|
519
520
|
| "context"
|
520
521
|
| "time"
|
521
522
|
| "working"
|
@@ -576,6 +577,7 @@ export interface EvalSpec {
|
|
576
577
|
dataset: EvalDataset;
|
577
578
|
sandbox: SandboxEnvironmentSpec | null;
|
578
579
|
model: Model;
|
580
|
+
model_generate_config: GenerateConfig;
|
579
581
|
model_base_url: ModelBaseUrl;
|
580
582
|
model_args: ModelArgs;
|
581
583
|
config: EvalConfig;
|
@@ -607,6 +609,62 @@ export interface SandboxEnvironmentSpec {
|
|
607
609
|
export interface Config {
|
608
610
|
[k: string]: unknown;
|
609
611
|
}
|
612
|
+
/**
|
613
|
+
* Model generation options.
|
614
|
+
*/
|
615
|
+
export interface GenerateConfig {
|
616
|
+
max_retries: MaxRetries;
|
617
|
+
timeout: Timeout;
|
618
|
+
max_connections: MaxConnections;
|
619
|
+
system_message: SystemMessage;
|
620
|
+
max_tokens: MaxTokens;
|
621
|
+
top_p: TopP;
|
622
|
+
temperature: Temperature;
|
623
|
+
stop_seqs: StopSeqs;
|
624
|
+
best_of: BestOf;
|
625
|
+
frequency_penalty: FrequencyPenalty;
|
626
|
+
presence_penalty: PresencePenalty;
|
627
|
+
logit_bias: LogitBias;
|
628
|
+
seed: Seed;
|
629
|
+
top_k: TopK;
|
630
|
+
num_choices: NumChoices;
|
631
|
+
logprobs: Logprobs;
|
632
|
+
top_logprobs: TopLogprobs;
|
633
|
+
parallel_tool_calls: ParallelToolCalls;
|
634
|
+
internal_tools: InternalTools;
|
635
|
+
max_tool_output: MaxToolOutput;
|
636
|
+
cache_prompt: CachePrompt;
|
637
|
+
reasoning_effort: ReasoningEffort;
|
638
|
+
reasoning_tokens: ReasoningTokens;
|
639
|
+
reasoning_history: ReasoningHistory;
|
640
|
+
response_schema: ResponseSchema | null;
|
641
|
+
}
|
642
|
+
/**
|
643
|
+
* Schema for model response when using Structured Output.
|
644
|
+
*/
|
645
|
+
export interface ResponseSchema {
|
646
|
+
name: Name1;
|
647
|
+
json_schema: JSONSchema;
|
648
|
+
description: Description1;
|
649
|
+
strict: Strict;
|
650
|
+
}
|
651
|
+
/**
|
652
|
+
* JSON Schema for type.
|
653
|
+
*/
|
654
|
+
export interface JSONSchema {
|
655
|
+
type: Type1;
|
656
|
+
description: Description;
|
657
|
+
default: Default;
|
658
|
+
enum: Enum;
|
659
|
+
items: JSONSchema | null;
|
660
|
+
properties: Properties;
|
661
|
+
additionalProperties: Additionalproperties;
|
662
|
+
anyOf: Anyof;
|
663
|
+
required: Required;
|
664
|
+
}
|
665
|
+
export interface Default {
|
666
|
+
[k: string]: unknown;
|
667
|
+
}
|
610
668
|
export interface ModelArgs {}
|
611
669
|
/**
|
612
670
|
* Configuration used for evaluation.
|
@@ -653,7 +711,7 @@ export interface ApprovalPolicyConfig {
|
|
653
711
|
* ```
|
654
712
|
*/
|
655
713
|
export interface ApproverPolicyConfig {
|
656
|
-
name:
|
714
|
+
name: Name2;
|
657
715
|
tools: Tools;
|
658
716
|
params: Params;
|
659
717
|
}
|
@@ -662,7 +720,7 @@ export interface Params {}
|
|
662
720
|
* Git revision for evaluation.
|
663
721
|
*/
|
664
722
|
export interface EvalRevision {
|
665
|
-
type:
|
723
|
+
type: Type2;
|
666
724
|
origin: Origin;
|
667
725
|
commit: Commit;
|
668
726
|
}
|
@@ -670,23 +728,23 @@ export interface Packages {
|
|
670
728
|
[k: string]: string;
|
671
729
|
}
|
672
730
|
export interface EvalScorer {
|
673
|
-
name:
|
731
|
+
name: Name3;
|
674
732
|
options: Options;
|
675
733
|
metrics: Metrics;
|
676
734
|
metadata: Metadata1;
|
677
735
|
}
|
678
736
|
export interface EvalMetricDefinition {
|
679
|
-
name:
|
737
|
+
name: Name4;
|
680
738
|
options: Options1;
|
681
739
|
}
|
682
740
|
/**
|
683
741
|
* Plan (solvers) used in evaluation.
|
684
742
|
*/
|
685
743
|
export interface EvalPlan {
|
686
|
-
name:
|
744
|
+
name: Name5;
|
687
745
|
steps: Steps;
|
688
746
|
finish: EvalPlanStep | null;
|
689
|
-
config:
|
747
|
+
config: GenerateConfig1;
|
690
748
|
}
|
691
749
|
/**
|
692
750
|
* Solver step.
|
@@ -699,7 +757,7 @@ export interface Params1 {}
|
|
699
757
|
/**
|
700
758
|
* Model generation options.
|
701
759
|
*/
|
702
|
-
export interface
|
760
|
+
export interface GenerateConfig1 {
|
703
761
|
max_retries: MaxRetries;
|
704
762
|
timeout: Timeout;
|
705
763
|
max_connections: MaxConnections;
|
@@ -726,32 +784,6 @@ export interface GenerateConfig {
|
|
726
784
|
reasoning_history: ReasoningHistory;
|
727
785
|
response_schema: ResponseSchema | null;
|
728
786
|
}
|
729
|
-
/**
|
730
|
-
* Schema for model response when using Structured Output.
|
731
|
-
*/
|
732
|
-
export interface ResponseSchema {
|
733
|
-
name: Name5;
|
734
|
-
json_schema: JSONSchema;
|
735
|
-
description: Description1;
|
736
|
-
strict: Strict;
|
737
|
-
}
|
738
|
-
/**
|
739
|
-
* JSON Schema for type.
|
740
|
-
*/
|
741
|
-
export interface JSONSchema {
|
742
|
-
type: Type2;
|
743
|
-
description: Description;
|
744
|
-
default: Default;
|
745
|
-
enum: Enum;
|
746
|
-
items: JSONSchema | null;
|
747
|
-
properties: Properties;
|
748
|
-
additionalProperties: Additionalproperties;
|
749
|
-
anyOf: Anyof;
|
750
|
-
required: Required;
|
751
|
-
}
|
752
|
-
export interface Default {
|
753
|
-
[k: string]: unknown;
|
754
|
-
}
|
755
787
|
/**
|
756
788
|
* Scoring results from evaluation.
|
757
789
|
*/
|
@@ -925,6 +957,7 @@ export interface ToolCall {
|
|
925
957
|
internal: unknown;
|
926
958
|
parse_error: ParseError;
|
927
959
|
view: ToolCallContent | null;
|
960
|
+
type: Type8;
|
928
961
|
}
|
929
962
|
export interface Arguments {}
|
930
963
|
/**
|
@@ -949,7 +982,7 @@ export interface ChatMessageTool {
|
|
949
982
|
error: ToolCallError | null;
|
950
983
|
}
|
951
984
|
export interface ToolCallError {
|
952
|
-
type:
|
985
|
+
type: Type9;
|
953
986
|
message: Message1;
|
954
987
|
}
|
955
988
|
/**
|
@@ -1037,7 +1070,7 @@ export interface SampleLimitEvent {
|
|
1037
1070
|
working_start: WorkingStart1;
|
1038
1071
|
pending: Pending1;
|
1039
1072
|
event: Event1;
|
1040
|
-
type:
|
1073
|
+
type: Type10;
|
1041
1074
|
message: Message2;
|
1042
1075
|
limit: Limit1;
|
1043
1076
|
}
|
@@ -1104,7 +1137,7 @@ export interface ModelEvent {
|
|
1104
1137
|
input: Input3;
|
1105
1138
|
tools: Tools1;
|
1106
1139
|
tool_choice: ToolChoice;
|
1107
|
-
config:
|
1140
|
+
config: GenerateConfig;
|
1108
1141
|
output: ModelOutput;
|
1109
1142
|
error: Error1;
|
1110
1143
|
cache: Cache;
|
@@ -1147,7 +1180,7 @@ export interface ToolInfo {
|
|
1147
1180
|
* Description of tool parameters object in JSON Schema format.
|
1148
1181
|
*/
|
1149
1182
|
export interface ToolParams {
|
1150
|
-
type:
|
1183
|
+
type: Type11;
|
1151
1184
|
properties: Properties1;
|
1152
1185
|
required: Required1;
|
1153
1186
|
additionalProperties: Additionalproperties1;
|
@@ -1158,36 +1191,6 @@ export interface Properties1 {
|
|
1158
1191
|
export interface ToolFunction {
|
1159
1192
|
name: Name9;
|
1160
1193
|
}
|
1161
|
-
/**
|
1162
|
-
* Model generation options.
|
1163
|
-
*/
|
1164
|
-
export interface GenerateConfig1 {
|
1165
|
-
max_retries: MaxRetries;
|
1166
|
-
timeout: Timeout;
|
1167
|
-
max_connections: MaxConnections;
|
1168
|
-
system_message: SystemMessage;
|
1169
|
-
max_tokens: MaxTokens;
|
1170
|
-
top_p: TopP;
|
1171
|
-
temperature: Temperature;
|
1172
|
-
stop_seqs: StopSeqs;
|
1173
|
-
best_of: BestOf;
|
1174
|
-
frequency_penalty: FrequencyPenalty;
|
1175
|
-
presence_penalty: PresencePenalty;
|
1176
|
-
logit_bias: LogitBias;
|
1177
|
-
seed: Seed;
|
1178
|
-
top_k: TopK;
|
1179
|
-
num_choices: NumChoices;
|
1180
|
-
logprobs: Logprobs;
|
1181
|
-
top_logprobs: TopLogprobs;
|
1182
|
-
parallel_tool_calls: ParallelToolCalls;
|
1183
|
-
internal_tools: InternalTools;
|
1184
|
-
max_tool_output: MaxToolOutput;
|
1185
|
-
cache_prompt: CachePrompt;
|
1186
|
-
reasoning_effort: ReasoningEffort;
|
1187
|
-
reasoning_tokens: ReasoningTokens;
|
1188
|
-
reasoning_history: ReasoningHistory;
|
1189
|
-
response_schema: ResponseSchema | null;
|
1190
|
-
}
|
1191
1194
|
/**
|
1192
1195
|
* Model call (raw request/response data).
|
1193
1196
|
*/
|
@@ -1210,7 +1213,7 @@ export interface ToolEvent {
|
|
1210
1213
|
working_start: WorkingStart6;
|
1211
1214
|
pending: Pending6;
|
1212
1215
|
event: Event6;
|
1213
|
-
type:
|
1216
|
+
type: Type12;
|
1214
1217
|
id: Id7;
|
1215
1218
|
function: Function2;
|
1216
1219
|
arguments: Arguments1;
|
@@ -1332,7 +1335,7 @@ export interface StepEvent {
|
|
1332
1335
|
pending: Pending13;
|
1333
1336
|
event: Event13;
|
1334
1337
|
action: Action1;
|
1335
|
-
type:
|
1338
|
+
type: Type13;
|
1336
1339
|
name: Name11;
|
1337
1340
|
}
|
1338
1341
|
/**
|
@@ -1344,7 +1347,7 @@ export interface SubtaskEvent {
|
|
1344
1347
|
pending: Pending14;
|
1345
1348
|
event: Event14;
|
1346
1349
|
name: Name12;
|
1347
|
-
type:
|
1350
|
+
type: Type14;
|
1348
1351
|
input: Input5;
|
1349
1352
|
result: Result2;
|
1350
1353
|
events: Events2;
|
@@ -1365,7 +1368,7 @@ export interface Attachments {
|
|
1365
1368
|
* Limit encontered by sample.
|
1366
1369
|
*/
|
1367
1370
|
export interface EvalSampleLimit {
|
1368
|
-
type:
|
1371
|
+
type: Type15;
|
1369
1372
|
limit: Limit2;
|
1370
1373
|
}
|
1371
1374
|
/**
|
inspect_ai/approval/_apply.py
CHANGED
inspect_ai/model/_call_tools.py
CHANGED
@@ -350,7 +350,17 @@ async def call_tool(
|
|
350
350
|
|
351
351
|
approved, approval = await apply_tool_approval(message, call, tool_def.viewer)
|
352
352
|
if not approved:
|
353
|
-
|
353
|
+
if approval and approval.decision == "terminate":
|
354
|
+
from inspect_ai.solver._limit import SampleLimitExceededError
|
355
|
+
|
356
|
+
raise SampleLimitExceededError(
|
357
|
+
"operator",
|
358
|
+
value=1,
|
359
|
+
limit=1,
|
360
|
+
message="Tool call approver requested termination.",
|
361
|
+
)
|
362
|
+
else:
|
363
|
+
raise ToolApprovalError(approval.explanation if approval else None)
|
354
364
|
if approval and approval.modified:
|
355
365
|
call = approval.modified
|
356
366
|
|
@@ -368,7 +368,11 @@ class AnthropicAPI(ModelAPI):
|
|
368
368
|
content: str | None = None
|
369
369
|
stop_reason: StopReason | None = None
|
370
370
|
|
371
|
-
|
371
|
+
# NOTE: Using case insensitive matching because the Anthropic Bedrock API seems to capitalize the work 'input' in its error message, other times it doesn't.
|
372
|
+
if any(
|
373
|
+
message in error.lower()
|
374
|
+
for message in ["prompt is too long", "input is too long"]
|
375
|
+
):
|
372
376
|
if (
|
373
377
|
isinstance(ex.body, dict)
|
374
378
|
and "error" in ex.body.keys()
|
@@ -810,8 +814,7 @@ async def model_output_from_message(
|
|
810
814
|
message.usage.input_tokens
|
811
815
|
+ (input_tokens_cache_write or 0)
|
812
816
|
+ (input_tokens_cache_read or 0)
|
813
|
-
+ message.usage.output_tokens
|
814
|
-
+ reasoning_tokens
|
817
|
+
+ message.usage.output_tokens # includes reasoning tokens
|
815
818
|
)
|
816
819
|
return ModelOutput(
|
817
820
|
model=message.model,
|
inspect_ai/tool/_tool_call.py
CHANGED
@@ -134,7 +134,7 @@ inspect_ai/_view/www/eslint.config.mjs,sha256=VA0D5XmD02XpekiakCRC45O8QQQCmNB3zO
|
|
134
134
|
inspect_ai/_view/www/favicon.svg,sha256=b9AHYZaO2zBzeKH6G4PwXZMGGW_UxY0omKHam-c9MAs,1508
|
135
135
|
inspect_ai/_view/www/index.html,sha256=wqZHIn_9TODavPHnGyY9F1RH6JBIphoaqRIRgBQgrUE,910
|
136
136
|
inspect_ai/_view/www/jsconfig.json,sha256=vt1gPPYezOFeV9nofA93CmVJAKGb1QeKGuyvEn1CXgk,383
|
137
|
-
inspect_ai/_view/www/log-schema.json,sha256=
|
137
|
+
inspect_ai/_view/www/log-schema.json,sha256=IgG7RswNo4JV-ODLb8M0JN8TtpfQXgUJ2Kc9GyfiZEM,115020
|
138
138
|
inspect_ai/_view/www/package.json,sha256=saCkYcrfygk9s9zXMZynqgREXl9a22jemF5vP8CLdq4,2465
|
139
139
|
inspect_ai/_view/www/postcss.config.cjs,sha256=mwpiwZD1alr_ECeLVf7vIpX_5KiARNF8HbkpWWiqSac,324
|
140
140
|
inspect_ai/_view/www/tsconfig.json,sha256=FbmQYpX8ta5Wyi8b8md2O_8CXkfQgr-Pe2yRyKXeqM0,619
|
@@ -325,7 +325,7 @@ inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css,sha256=J5F
|
|
325
325
|
inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx,sha256=lAnzOYf3PZte1X_o6nvq2FBkTzPlsGq9aCOJK82Ofcg,5805
|
326
326
|
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css,sha256=kB4a3g7RveznFGsPmQajJRuNWRrpDyK2DgztvUMIYZQ,275
|
327
327
|
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx,sha256=SFIRa8ugPpnhzce8E8yRKSSxjiVwrxJMjTifuLptWjY,2853
|
328
|
-
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx,sha256=
|
328
|
+
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx,sha256=MFk7j32Rs0-CaD9feljXqRxE8dZkP0eLM0xRG8aujLQ,1618
|
329
329
|
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css,sha256=G-oKvjD6c7sTGu1Iemh_1MI2QQpUi7h5uGApW5lO5NU,385
|
330
330
|
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx,sha256=0gQEjTMnfMgcPj28V-ATzwL-gWWSZBjZxeYN8G2cMzQ,3799
|
331
331
|
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css,sha256=YWHWPM_-2UognvNIjB5-UejG17xy0yRW3jyfN4UeD1E,246
|
@@ -377,7 +377,7 @@ inspect_ai/_view/www/src/state/utils.ts,sha256=Lshq-OnLiooBkCHxqu8eTOEMAAPfdcixs
|
|
377
377
|
inspect_ai/_view/www/src/storage/index.ts,sha256=wtoQspke1wWOspvQLUFxEAi1Gd1jdHkfRfWH1B3RPkw,774
|
378
378
|
inspect_ai/_view/www/src/types/asciicinema-player.d.ts,sha256=PgM6swZ9P5pKXcdKfYfmd1dcZQDy105K60NvcQPFqVo,647
|
379
379
|
inspect_ai/_view/www/src/types/jsondiffpatch.d.ts,sha256=QXTAwln2Z1vDiNuoG4b-VWoH0hKMJHSM1L2deXEV6ZQ,188
|
380
|
-
inspect_ai/_view/www/src/types/log.d.ts,sha256=
|
380
|
+
inspect_ai/_view/www/src/types/log.d.ts,sha256=CZNzrWy32fzGendKuS9Kwb1Z7N7fYOwoQPDKJ3wmEOE,32832
|
381
381
|
inspect_ai/_view/www/src/types/markdown-it-katex.d.ts,sha256=kvZSFtTInD4akeCLVWlwjjqHdZqMpE3rXjXWQvMgcOw,555
|
382
382
|
inspect_ai/_view/www/src/types/prism.d.ts,sha256=g0uL_2XdnxuCVS_XX6iD9PHIF9PWix_vPXHOVz_4vII,342
|
383
383
|
inspect_ai/_view/www/src/usage/ModelTokenTable.tsx,sha256=UM9r2Zg7AUFmgXZlB-jL-McK6QLOSifGILhB8403J2I,623
|
@@ -477,7 +477,7 @@ inspect_ai/agent/_human/commands/score.py,sha256=6DyKiYHU7w-tKxHH5cZ0rXgFY7NWc4k
|
|
477
477
|
inspect_ai/agent/_human/commands/status.py,sha256=uUO5M4skWDp29OS8sqVKAqZw0OcM3MSesBYQNbRypJ0,1934
|
478
478
|
inspect_ai/agent/_human/commands/submit.py,sha256=D2p1M2ApvAcaVZhbP3fFofG9ZsPVvmxivSLIF5xQxtA,6524
|
479
479
|
inspect_ai/approval/__init__.py,sha256=Bqq4GFljOqKaIUkuCvhlFv89TfJpvbuO_R0jVyjb8VI,379
|
480
|
-
inspect_ai/approval/_apply.py,sha256=
|
480
|
+
inspect_ai/approval/_apply.py,sha256=WAB7mqWv3A-2atVoqZuMeq7jZW2cQiR7n2qX6tXyF6A,2169
|
481
481
|
inspect_ai/approval/_approval.py,sha256=twQcEvfU3-hPdsG785ak8OvRMOzMa00-UQAdz9Mh8Fo,863
|
482
482
|
inspect_ai/approval/_approver.py,sha256=0Sz6lUMWLEi9E5fQZ8sWD_K-GWAw7B66_vfVunKJkvY,844
|
483
483
|
inspect_ai/approval/_auto.py,sha256=ZBNR1D2nroQslpeyaOAGkfWQ1pN8AmMXn3zDn5fcw98,734
|
@@ -529,7 +529,7 @@ inspect_ai/log/_recorders/buffer/filestore.py,sha256=S6RP-5zkOPSmy1hV2LCCbfwdX-Y
|
|
529
529
|
inspect_ai/log/_recorders/buffer/types.py,sha256=pTnPCZHbk9qF6yF-eNXHTa23cLH_FvP8dmfPJCFO15Q,2046
|
530
530
|
inspect_ai/model/__init__.py,sha256=6Aa_HEU-rgxWPDaIRlE6KBdXY406x2LtcLeVtAxk-AI,2453
|
531
531
|
inspect_ai/model/_cache.py,sha256=Bl6WS9b1kJRVsGK0h7Fd1-mDAbrlxvNXMPK30P3aMuM,13736
|
532
|
-
inspect_ai/model/_call_tools.py,sha256=
|
532
|
+
inspect_ai/model/_call_tools.py,sha256=T5MGDojp-pHNPEwxE2BLD7dgAWIBYWz3bdJUQdAmhIw,28277
|
533
533
|
inspect_ai/model/_chat_message.py,sha256=Kz933i25M175O4SoYTvJMwt4ELTiFaohfaDgK0myFyw,7417
|
534
534
|
inspect_ai/model/_conversation.py,sha256=J4zxb8mJdcpV5zLEDYS-ikQckONeaUZrqNReLLruUOE,374
|
535
535
|
inspect_ai/model/_display.py,sha256=0wb9tV4PItvwgUpqpxLCL60oWlg4lT1nVA6GKJV3rcU,3090
|
@@ -543,7 +543,7 @@ inspect_ai/model/_openai_responses.py,sha256=bQWuVvJIkS8CqtoX9z1aRb1aky4TNbMngG2
|
|
543
543
|
inspect_ai/model/_reasoning.py,sha256=qmR8WT6t_cb7NIsJOQHPyFZh2eLV0HmYxKo2vtvteQ4,929
|
544
544
|
inspect_ai/model/_registry.py,sha256=Cr2y32EqLnOqLbSWoXHVK4ivTTzCUhJuACxoTyPt8kY,2032
|
545
545
|
inspect_ai/model/_render.py,sha256=rWypNUjgrH4NGp0r-ESAze9gZz7lYNjheEP438vRYZE,922
|
546
|
-
inspect_ai/model/_providers/anthropic.py,sha256=
|
546
|
+
inspect_ai/model/_providers/anthropic.py,sha256=2IhKsbntPTaW4MosabUv0kWlm2oYj3tomjJUKunKuu0,33647
|
547
547
|
inspect_ai/model/_providers/azureai.py,sha256=uXED_qmeyW1XAGBosbG7PJNk833RIeokKX3l_8O9gYA,14341
|
548
548
|
inspect_ai/model/_providers/bedrock.py,sha256=rh8BvSUPWiFMh0TQwMYTlucfFrDKswtLhzozulrz7wE,24004
|
549
549
|
inspect_ai/model/_providers/cloudflare.py,sha256=mWqBqc0zzf29UWz34biq8CxSu99a95YjpH_6A4na52g,4617
|
@@ -611,7 +611,7 @@ inspect_ai/solver/_use_tools.py,sha256=3Oprsk5FlG5paHLqAurNgj23-endW3_t3cxe28wkM
|
|
611
611
|
inspect_ai/solver/_util.py,sha256=pthrf-CzC6FnQYSUFLXTYM4wFEJptZrh5POTmV-Jtow,446
|
612
612
|
inspect_ai/tool/__init__.py,sha256=M1xR4GLN4LBCzWWbMbIGXE-XnagdvC9mK5_a4qK35Ew,2466
|
613
613
|
inspect_ai/tool/_tool.py,sha256=VjnbMnsXPLpNqglc6Oss7OK7cVHHG7W2qzWtjcOc5Us,7055
|
614
|
-
inspect_ai/tool/_tool_call.py,sha256=
|
614
|
+
inspect_ai/tool/_tool_call.py,sha256=sd5-RbsOWz3oN4wGcJ_I4w-Ku4sgKJ-qmwirQQXJ18o,2754
|
615
615
|
inspect_ai/tool/_tool_choice.py,sha256=L8QNmcnSnilzKg2HU3G65W5aYaSngO09z4FQ0fQlATM,496
|
616
616
|
inspect_ai/tool/_tool_def.py,sha256=3Caxc8ywDfOLy5Lez82_tSNyopQ1BI-BnET5YePv1N0,8400
|
617
617
|
inspect_ai/tool/_tool_description.py,sha256=SZTQzehReNNKwQ0iUL6v4pPfEptgf3UOP4J888JV18M,524
|
@@ -692,9 +692,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
|
|
692
692
|
inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
|
693
693
|
inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
|
694
694
|
inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
|
695
|
-
inspect_ai-0.3.
|
696
|
-
inspect_ai-0.3.
|
697
|
-
inspect_ai-0.3.
|
698
|
-
inspect_ai-0.3.
|
699
|
-
inspect_ai-0.3.
|
700
|
-
inspect_ai-0.3.
|
695
|
+
inspect_ai-0.3.88.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
|
696
|
+
inspect_ai-0.3.88.dist-info/METADATA,sha256=NdRp3yaIApWvQ2Y0j9xCs8m6Fsloy_Aunk-YSOiIIf4,4965
|
697
|
+
inspect_ai-0.3.88.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
698
|
+
inspect_ai-0.3.88.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
|
699
|
+
inspect_ai-0.3.88.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
|
700
|
+
inspect_ai-0.3.88.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|