inspect-ai 0.3.61__py3-none-any.whl → 0.3.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +13 -0
- inspect_ai/_cli/view.py +4 -0
- inspect_ai/_display/textual/widgets/transcript.py +15 -9
- inspect_ai/_eval/task/error.py +10 -14
- inspect_ai/_eval/task/run.py +10 -8
- inspect_ai/_util/transcript.py +11 -0
- inspect_ai/_view/www/dist/assets/index.css +1 -0
- inspect_ai/_view/www/dist/assets/index.js +100 -94
- inspect_ai/_view/www/log-schema.json +35 -19
- inspect_ai/_view/www/src/components/ChatView.mjs +23 -0
- inspect_ai/_view/www/src/types/log.d.ts +6 -4
- inspect_ai/log/_recorders/eval.py +1 -1
- inspect_ai/model/_chat_message.py +27 -0
- inspect_ai/model/_conversation.py +10 -3
- inspect_ai/model/_generate_config.py +6 -0
- inspect_ai/model/_model.py +74 -0
- inspect_ai/model/_openai.py +33 -1
- inspect_ai/model/_providers/anthropic.py +12 -0
- inspect_ai/model/_providers/groq.py +4 -0
- inspect_ai/model/_providers/openai.py +21 -9
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_reasoning.py +17 -0
- inspect_ai/solver/_basic_agent.py +19 -9
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +4 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/Code/User/settings.json +3 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +61 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +10 -0
- {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/RECORD +34 -29
- {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/top_level.txt +0 -0
@@ -260,13 +260,26 @@
|
|
260
260
|
],
|
261
261
|
"default": null,
|
262
262
|
"title": "Tool Calls"
|
263
|
+
},
|
264
|
+
"reasoning": {
|
265
|
+
"anyOf": [
|
266
|
+
{
|
267
|
+
"type": "string"
|
268
|
+
},
|
269
|
+
{
|
270
|
+
"type": "null"
|
271
|
+
}
|
272
|
+
],
|
273
|
+
"default": null,
|
274
|
+
"title": "Reasoning"
|
263
275
|
}
|
264
276
|
},
|
265
277
|
"required": [
|
266
278
|
"content",
|
267
279
|
"source",
|
268
280
|
"role",
|
269
|
-
"tool_calls"
|
281
|
+
"tool_calls",
|
282
|
+
"reasoning"
|
270
283
|
],
|
271
284
|
"title": "ChatMessageAssistant",
|
272
285
|
"type": "object",
|
@@ -486,7 +499,10 @@
|
|
486
499
|
"tool_call_id": {
|
487
500
|
"anyOf": [
|
488
501
|
{
|
489
|
-
"
|
502
|
+
"items": {
|
503
|
+
"type": "string"
|
504
|
+
},
|
505
|
+
"type": "array"
|
490
506
|
},
|
491
507
|
{
|
492
508
|
"type": "null"
|
@@ -1131,7 +1147,6 @@
|
|
1131
1147
|
"presence_penalty": null,
|
1132
1148
|
"logit_bias": null,
|
1133
1149
|
"seed": null,
|
1134
|
-
"suffix": null,
|
1135
1150
|
"top_k": null,
|
1136
1151
|
"num_choices": null,
|
1137
1152
|
"logprobs": null,
|
@@ -1140,7 +1155,8 @@
|
|
1140
1155
|
"internal_tools": null,
|
1141
1156
|
"max_tool_output": null,
|
1142
1157
|
"cache_prompt": null,
|
1143
|
-
"reasoning_effort": null
|
1158
|
+
"reasoning_effort": null,
|
1159
|
+
"reasoning_history": null
|
1144
1160
|
}
|
1145
1161
|
}
|
1146
1162
|
},
|
@@ -2120,18 +2136,6 @@
|
|
2120
2136
|
"default": null,
|
2121
2137
|
"title": "Seed"
|
2122
2138
|
},
|
2123
|
-
"suffix": {
|
2124
|
-
"anyOf": [
|
2125
|
-
{
|
2126
|
-
"type": "string"
|
2127
|
-
},
|
2128
|
-
{
|
2129
|
-
"type": "null"
|
2130
|
-
}
|
2131
|
-
],
|
2132
|
-
"default": null,
|
2133
|
-
"title": "Suffix"
|
2134
|
-
},
|
2135
2139
|
"top_k": {
|
2136
2140
|
"anyOf": [
|
2137
2141
|
{
|
@@ -2248,6 +2252,18 @@
|
|
2248
2252
|
],
|
2249
2253
|
"default": null,
|
2250
2254
|
"title": "Reasoning Effort"
|
2255
|
+
},
|
2256
|
+
"reasoning_history": {
|
2257
|
+
"anyOf": [
|
2258
|
+
{
|
2259
|
+
"type": "boolean"
|
2260
|
+
},
|
2261
|
+
{
|
2262
|
+
"type": "null"
|
2263
|
+
}
|
2264
|
+
],
|
2265
|
+
"default": null,
|
2266
|
+
"title": "Reasoning History"
|
2251
2267
|
}
|
2252
2268
|
},
|
2253
2269
|
"title": "GenerateConfig",
|
@@ -2266,7 +2282,6 @@
|
|
2266
2282
|
"presence_penalty",
|
2267
2283
|
"logit_bias",
|
2268
2284
|
"seed",
|
2269
|
-
"suffix",
|
2270
2285
|
"top_k",
|
2271
2286
|
"num_choices",
|
2272
2287
|
"logprobs",
|
@@ -2275,7 +2290,8 @@
|
|
2275
2290
|
"internal_tools",
|
2276
2291
|
"max_tool_output",
|
2277
2292
|
"cache_prompt",
|
2278
|
-
"reasoning_effort"
|
2293
|
+
"reasoning_effort",
|
2294
|
+
"reasoning_history"
|
2279
2295
|
],
|
2280
2296
|
"additionalProperties": false
|
2281
2297
|
},
|
@@ -4247,9 +4263,9 @@
|
|
4247
4263
|
"parallel_tool_calls": null,
|
4248
4264
|
"presence_penalty": null,
|
4249
4265
|
"reasoning_effort": null,
|
4266
|
+
"reasoning_history": null,
|
4250
4267
|
"seed": null,
|
4251
4268
|
"stop_seqs": null,
|
4252
|
-
"suffix": null,
|
4253
4269
|
"system_message": null,
|
4254
4270
|
"temperature": null,
|
4255
4271
|
"timeout": null,
|
@@ -8,6 +8,7 @@ import { ExpandablePanel } from "./ExpandablePanel.mjs";
|
|
8
8
|
import { FontSize, TextStyle } from "../appearance/Fonts.mjs";
|
9
9
|
import { resolveToolInput, ToolCallView } from "./Tools.mjs";
|
10
10
|
import { VirtualList } from "./VirtualList.mjs";
|
11
|
+
import { MarkdownDiv } from "./MarkdownDiv.mjs";
|
11
12
|
|
12
13
|
/**
|
13
14
|
* Renders the ChatViewVirtualList component.
|
@@ -282,7 +283,29 @@ const ChatMessage = ({
|
|
282
283
|
<i class="${iconForMsg(message)}"></i>
|
283
284
|
${message.role}
|
284
285
|
</div>
|
286
|
+
|
287
|
+
${
|
288
|
+
message.role === "assistant" && message.reasoning
|
289
|
+
? html` <div
|
290
|
+
style=${{
|
291
|
+
marginLeft: indented ? "1.1rem" : "0",
|
292
|
+
paddingBottom: "0.8rem",
|
293
|
+
}}
|
294
|
+
>
|
295
|
+
<div style=${{ ...TextStyle.label, ...TextStyle.secondary }}>Reasoning</div>
|
296
|
+
<${ExpandablePanel} collapse=${true}><${MarkdownDiv} markdown=${message.reasoning}/></${ExpandablePanel}>
|
297
|
+
</div>`
|
298
|
+
: undefined
|
299
|
+
}
|
300
|
+
|
285
301
|
<div style=${{ marginLeft: indented ? "1.1rem" : "0", paddingBottom: indented ? "0.8rem" : "0" }}>
|
302
|
+
${
|
303
|
+
message.role === "assistant" && message.reasoning
|
304
|
+
? html`<div style=${{ ...TextStyle.label, ...TextStyle.secondary }}>
|
305
|
+
Response
|
306
|
+
</div>`
|
307
|
+
: ""
|
308
|
+
}
|
286
309
|
<${ExpandablePanel} collapse=${collapse}>
|
287
310
|
<${MessageContents}
|
288
311
|
key=${`${id}-contents`}
|
@@ -70,7 +70,6 @@ export type LogitBias = {
|
|
70
70
|
[k: string]: number;
|
71
71
|
} | null;
|
72
72
|
export type Seed = number | null;
|
73
|
-
export type Suffix = string | null;
|
74
73
|
export type TopK = number | null;
|
75
74
|
export type NumChoices = number | null;
|
76
75
|
export type Logprobs = boolean | null;
|
@@ -80,6 +79,7 @@ export type InternalTools = boolean | null;
|
|
80
79
|
export type MaxToolOutput = number | null;
|
81
80
|
export type CachePrompt = "auto" | boolean | null;
|
82
81
|
export type ReasoningEffort = ("low" | "medium" | "high") | null;
|
82
|
+
export type ReasoningHistory = boolean | null;
|
83
83
|
export type TotalSamples = number;
|
84
84
|
export type CompletedSamples = number;
|
85
85
|
export type Name3 = string;
|
@@ -133,7 +133,7 @@ export type Content1 =
|
|
133
133
|
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
134
134
|
export type Source1 = ("input" | "generate") | null;
|
135
135
|
export type Role1 = "user";
|
136
|
-
export type ToolCallId = string | null;
|
136
|
+
export type ToolCallId = string[] | null;
|
137
137
|
export type Content2 =
|
138
138
|
| string
|
139
139
|
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
@@ -147,6 +147,7 @@ export type ParseError = string | null;
|
|
147
147
|
export type Title = string | null;
|
148
148
|
export type Format2 = "text" | "markdown";
|
149
149
|
export type Content3 = string;
|
150
|
+
export type Reasoning = string | null;
|
150
151
|
export type Content4 =
|
151
152
|
| string
|
152
153
|
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
@@ -547,7 +548,6 @@ export interface GenerateConfig {
|
|
547
548
|
presence_penalty: PresencePenalty;
|
548
549
|
logit_bias: LogitBias;
|
549
550
|
seed: Seed;
|
550
|
-
suffix: Suffix;
|
551
551
|
top_k: TopK;
|
552
552
|
num_choices: NumChoices;
|
553
553
|
logprobs: Logprobs;
|
@@ -557,6 +557,7 @@ export interface GenerateConfig {
|
|
557
557
|
max_tool_output: MaxToolOutput;
|
558
558
|
cache_prompt: CachePrompt;
|
559
559
|
reasoning_effort: ReasoningEffort;
|
560
|
+
reasoning_history: ReasoningHistory;
|
560
561
|
}
|
561
562
|
export interface EvalResults {
|
562
563
|
total_samples: TotalSamples;
|
@@ -658,6 +659,7 @@ export interface ChatMessageAssistant {
|
|
658
659
|
source: Source2;
|
659
660
|
role: Role2;
|
660
661
|
tool_calls: ToolCalls;
|
662
|
+
reasoning: Reasoning;
|
661
663
|
}
|
662
664
|
export interface ToolCall {
|
663
665
|
id: Id1;
|
@@ -901,7 +903,6 @@ export interface GenerateConfig1 {
|
|
901
903
|
presence_penalty: PresencePenalty;
|
902
904
|
logit_bias: LogitBias;
|
903
905
|
seed: Seed;
|
904
|
-
suffix: Suffix;
|
905
906
|
top_k: TopK;
|
906
907
|
num_choices: NumChoices;
|
907
908
|
logprobs: Logprobs;
|
@@ -911,6 +912,7 @@ export interface GenerateConfig1 {
|
|
911
912
|
max_tool_output: MaxToolOutput;
|
912
913
|
cache_prompt: CachePrompt;
|
913
914
|
reasoning_effort: ReasoningEffort;
|
915
|
+
reasoning_history: ReasoningHistory;
|
914
916
|
}
|
915
917
|
/**
|
916
918
|
* Model call (raw request/response data).
|
@@ -203,7 +203,7 @@ class EvalRecorder(FileRecorder):
|
|
203
203
|
# of small fetches from the zip file streams)
|
204
204
|
temp_log: str | None = None
|
205
205
|
fs = filesystem(location)
|
206
|
-
if not fs.is_local():
|
206
|
+
if not fs.is_local() and header_only is False:
|
207
207
|
with tempfile.NamedTemporaryFile(delete=False) as temp:
|
208
208
|
temp_log = temp.name
|
209
209
|
fs.get_file(location, temp_log)
|
@@ -7,6 +7,8 @@ from inspect_ai._util.content import Content, ContentText
|
|
7
7
|
from inspect_ai.tool import ToolCall
|
8
8
|
from inspect_ai.tool._tool_call import ToolCallError
|
9
9
|
|
10
|
+
from ._reasoning import parse_content_with_reasoning
|
11
|
+
|
10
12
|
logger = getLogger(__name__)
|
11
13
|
|
12
14
|
|
@@ -83,6 +85,31 @@ class ChatMessageAssistant(ChatMessageBase):
|
|
83
85
|
tool_calls: list[ToolCall] | None = Field(default=None)
|
84
86
|
"""Tool calls made by the model."""
|
85
87
|
|
88
|
+
reasoning: str | None = Field(default=None)
|
89
|
+
"""Reasoning content."""
|
90
|
+
|
91
|
+
# Some OpenAI compatible REST endpoints include reasoning as a field alongside
|
92
|
+
# content, however since this field doesn't exist in the OpenAI interface,
|
93
|
+
# hosting providers (so far we've seen this with Together and Groq) may
|
94
|
+
# include the reasoning in a <think></think> tag before the main response.
|
95
|
+
# We expect this pattern to be repeated elsewhere, so include this hook to
|
96
|
+
# automatically extract the reasoning content when the response is prefaced
|
97
|
+
# with a <think> block. If this ends up being an overeach we can fall back
|
98
|
+
# to each provider manually parsing out <think> using a helper function.
|
99
|
+
# The implementation isn't important here, the critical thing to establish
|
100
|
+
# is that Inspect makes reasoning content available separately.
|
101
|
+
@model_validator(mode="before")
|
102
|
+
@classmethod
|
103
|
+
def extract_reasoning(cls, data: Any) -> Any:
|
104
|
+
if isinstance(data, dict):
|
105
|
+
content = data.get("content", None)
|
106
|
+
if isinstance(content, str):
|
107
|
+
parsed = parse_content_with_reasoning(content)
|
108
|
+
if parsed:
|
109
|
+
data["reasoning"] = parsed.reasoning
|
110
|
+
data["content"] = parsed.content
|
111
|
+
return data
|
112
|
+
|
86
113
|
|
87
114
|
class ChatMessageTool(ChatMessageBase):
|
88
115
|
role: Literal["tool"] = Field(default="tool")
|
@@ -2,7 +2,7 @@ from rich.console import RenderableType
|
|
2
2
|
from rich.text import Text
|
3
3
|
|
4
4
|
from inspect_ai._util.rich import lines_display
|
5
|
-
from inspect_ai._util.transcript import transcript_markdown
|
5
|
+
from inspect_ai._util.transcript import transcript_markdown, transcript_reasoning
|
6
6
|
from inspect_ai.util._conversation import conversation_panel
|
7
7
|
from inspect_ai.util._display import display_type
|
8
8
|
|
@@ -38,8 +38,15 @@ def conversation_assistant_message(
|
|
38
38
|
content=transcript_markdown(m.text, escape=True),
|
39
39
|
)
|
40
40
|
|
41
|
-
#
|
42
|
-
content: list[RenderableType] =
|
41
|
+
# build content
|
42
|
+
content: list[RenderableType] = []
|
43
|
+
|
44
|
+
# reasoning
|
45
|
+
if message.reasoning:
|
46
|
+
content.extend(transcript_reasoning(message.reasoning))
|
47
|
+
|
48
|
+
# message text
|
49
|
+
content.extend(
|
43
50
|
[transcript_markdown(message.text, escape=True)] if message.text else []
|
44
51
|
)
|
45
52
|
|
@@ -75,6 +75,9 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
75
75
|
reasoning_effort: Literal["low", "medium", "high"] | None
|
76
76
|
"""Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
|
77
77
|
|
78
|
+
reasoning_history: bool | None
|
79
|
+
"""Include reasoning in chat message history sent to generate."""
|
80
|
+
|
78
81
|
|
79
82
|
class GenerateConfig(BaseModel):
|
80
83
|
"""Base class for model generation configs."""
|
@@ -145,6 +148,9 @@ class GenerateConfig(BaseModel):
|
|
145
148
|
reasoning_effort: Literal["low", "medium", "high"] | None = Field(default=None)
|
146
149
|
"""Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
|
147
150
|
|
151
|
+
reasoning_history: bool | None = Field(default=None)
|
152
|
+
"""Include reasoning in chat message history sent to generate."""
|
153
|
+
|
148
154
|
def merge(
|
149
155
|
self, other: Union["GenerateConfig", GenerateConfigArgs]
|
150
156
|
) -> "GenerateConfig":
|
inspect_ai/model/_model.py
CHANGED
@@ -168,6 +168,10 @@ class ModelAPI(abc.ABC):
|
|
168
168
|
"""Tool results can contain images"""
|
169
169
|
return False
|
170
170
|
|
171
|
+
def has_reasoning_history(self) -> bool:
|
172
|
+
"""Chat message assistant messages can include reasoning."""
|
173
|
+
return False
|
174
|
+
|
171
175
|
|
172
176
|
class Model:
|
173
177
|
"""Model interface."""
|
@@ -302,6 +306,11 @@ class Model:
|
|
302
306
|
tools = []
|
303
307
|
tool_choice = "none"
|
304
308
|
|
309
|
+
# handle reasoning history
|
310
|
+
input = resolve_reasoning_history(
|
311
|
+
input, config, self.api.has_reasoning_history()
|
312
|
+
)
|
313
|
+
|
305
314
|
# apply any tool model_input handlers
|
306
315
|
input = resolve_tool_model_input(tdefs, input)
|
307
316
|
|
@@ -726,6 +735,71 @@ def simple_input_messages(
|
|
726
735
|
return messages
|
727
736
|
|
728
737
|
|
738
|
+
def resolve_reasoning_history(
|
739
|
+
messages: list[ChatMessage], config: GenerateConfig, api_has_reasoning_history: bool
|
740
|
+
) -> list[ChatMessage]:
|
741
|
+
# determine if we are including reasoning history
|
742
|
+
reasoning_history = config.reasoning_history is not False
|
743
|
+
|
744
|
+
# determine up front if we have any reasoning content
|
745
|
+
have_reasoning = any(
|
746
|
+
[
|
747
|
+
isinstance(m, ChatMessageAssistant) and m.reasoning is not None
|
748
|
+
for m in messages
|
749
|
+
]
|
750
|
+
)
|
751
|
+
if not have_reasoning:
|
752
|
+
return messages
|
753
|
+
|
754
|
+
# API asssistant message format directly supports reasoning history so we will:
|
755
|
+
# (a) Remove reasoning content entirely if config says not to include it; or
|
756
|
+
# (b) Leave the messages alone if config says to include it
|
757
|
+
if api_has_reasoning_history:
|
758
|
+
# remove reasoning history as per config
|
759
|
+
if not reasoning_history:
|
760
|
+
resolved_messages: list[ChatMessage] = []
|
761
|
+
for message in messages:
|
762
|
+
if isinstance(message, ChatMessageAssistant):
|
763
|
+
resolved_messages.append(
|
764
|
+
message.model_copy(update={"reasoning": None})
|
765
|
+
)
|
766
|
+
else:
|
767
|
+
resolved_messages.append(message)
|
768
|
+
|
769
|
+
return resolved_messages
|
770
|
+
|
771
|
+
# include reasoning history as per config
|
772
|
+
else:
|
773
|
+
return messages
|
774
|
+
|
775
|
+
# API can't represent reasoning natively so include <think> tags
|
776
|
+
elif reasoning_history:
|
777
|
+
resolved_messages = []
|
778
|
+
for message in messages:
|
779
|
+
if (
|
780
|
+
isinstance(message, ChatMessageAssistant)
|
781
|
+
and message.reasoning is not None
|
782
|
+
):
|
783
|
+
message = deepcopy(message)
|
784
|
+
if isinstance(message.content, str):
|
785
|
+
message.content = (
|
786
|
+
f"<think>\n{message.reasoning}\n</think>\n\n{message.content}"
|
787
|
+
)
|
788
|
+
else:
|
789
|
+
message.content.insert(
|
790
|
+
0, ContentText(text=f"<think>\n{message.reasoning}\n</think>\n")
|
791
|
+
)
|
792
|
+
message.reasoning = None
|
793
|
+
|
794
|
+
resolved_messages.append(message)
|
795
|
+
|
796
|
+
return resolved_messages
|
797
|
+
|
798
|
+
# api doesn't handle reasoning and config says no reasoning_history, nothing to do
|
799
|
+
else:
|
800
|
+
return messages
|
801
|
+
|
802
|
+
|
729
803
|
def resolve_tool_model_input(
|
730
804
|
tdefs: list[ToolDef], messages: list[ChatMessage]
|
731
805
|
) -> list[ChatMessage]:
|
inspect_ai/model/_openai.py
CHANGED
@@ -43,10 +43,18 @@ from ._chat_message import (
|
|
43
43
|
from ._model_output import ModelUsage, StopReason, as_stop_reason
|
44
44
|
|
45
45
|
|
46
|
+
def is_o_series(name: str) -> bool:
|
47
|
+
return is_o1(name) or is_o3(name)
|
48
|
+
|
49
|
+
|
46
50
|
def is_o1(name: str) -> bool:
|
47
51
|
return name.startswith("o1")
|
48
52
|
|
49
53
|
|
54
|
+
def is_o3(name: str) -> bool:
|
55
|
+
return name.startswith("o3")
|
56
|
+
|
57
|
+
|
50
58
|
def is_o1_full(name: str) -> bool:
|
51
59
|
return is_o1(name) and not is_o1_mini(name) and not is_o1_preview(name)
|
52
60
|
|
@@ -55,10 +63,18 @@ def is_o1_mini(name: str) -> bool:
|
|
55
63
|
return name.startswith("o1-mini")
|
56
64
|
|
57
65
|
|
66
|
+
def is_o3_mini(name: str) -> bool:
|
67
|
+
return name.startswith("o3-mini")
|
68
|
+
|
69
|
+
|
58
70
|
def is_o1_preview(name: str) -> bool:
|
59
71
|
return name.startswith("o1-preview")
|
60
72
|
|
61
73
|
|
74
|
+
def is_gpt(name: str) -> bool:
|
75
|
+
return name.startswith("gpt")
|
76
|
+
|
77
|
+
|
62
78
|
def openai_chat_tool_call(tool_call: ToolCall) -> ChatCompletionMessageToolCall:
|
63
79
|
return ChatCompletionMessageToolCall(
|
64
80
|
type="function",
|
@@ -296,6 +312,14 @@ def chat_messages_from_openai(
|
|
296
312
|
else:
|
297
313
|
content = [content_from_openai(c) for c in asst_content]
|
298
314
|
|
315
|
+
# resolve reasoning (OpenAI doesn't suport this however OpenAI-compatible
|
316
|
+
# interfaces e.g. DeepSeek do include this field so we pluck it out)
|
317
|
+
reasoning = message.get("reasoning_content", None) or message.get(
|
318
|
+
"reasoning", None
|
319
|
+
)
|
320
|
+
if reasoning is not None:
|
321
|
+
reasoning = str(reasoning)
|
322
|
+
|
299
323
|
# return message
|
300
324
|
if "tool_calls" in message:
|
301
325
|
tool_calls: list[ToolCall] = []
|
@@ -306,7 +330,11 @@ def chat_messages_from_openai(
|
|
306
330
|
else:
|
307
331
|
tool_calls = []
|
308
332
|
chat_messages.append(
|
309
|
-
ChatMessageAssistant(
|
333
|
+
ChatMessageAssistant(
|
334
|
+
content=content,
|
335
|
+
tool_calls=tool_calls or None,
|
336
|
+
reasoning=reasoning,
|
337
|
+
)
|
310
338
|
)
|
311
339
|
elif message["role"] == "tool":
|
312
340
|
tool_content = message.get("content", None) or ""
|
@@ -357,10 +385,14 @@ def chat_message_assistant_from_openai(
|
|
357
385
|
message: ChatCompletionMessage, tools: list[ToolInfo]
|
358
386
|
) -> ChatMessageAssistant:
|
359
387
|
refusal = getattr(message, "refusal", None)
|
388
|
+
reasoning = getattr(message, "reasoning_content", None) or getattr(
|
389
|
+
message, "reasoning", None
|
390
|
+
)
|
360
391
|
return ChatMessageAssistant(
|
361
392
|
content=refusal or message.content or "",
|
362
393
|
source="generate",
|
363
394
|
tool_calls=chat_tool_calls_from_openai(message, tools),
|
395
|
+
reasoning=reasoning,
|
364
396
|
)
|
365
397
|
|
366
398
|
|
@@ -12,6 +12,7 @@ else:
|
|
12
12
|
|
13
13
|
from anthropic import (
|
14
14
|
APIConnectionError,
|
15
|
+
APIStatusError,
|
15
16
|
AsyncAnthropic,
|
16
17
|
AsyncAnthropicBedrock,
|
17
18
|
AsyncAnthropicVertex,
|
@@ -215,6 +216,17 @@ class AnthropicAPI(ModelAPI):
|
|
215
216
|
# return output and call
|
216
217
|
return output, model_call()
|
217
218
|
|
219
|
+
except APIStatusError as ex:
|
220
|
+
if ex.status_code == 413:
|
221
|
+
return ModelOutput.from_content(
|
222
|
+
model=self.model_name,
|
223
|
+
content=ex.message,
|
224
|
+
stop_reason="model_length",
|
225
|
+
error=ex.message,
|
226
|
+
), model_call()
|
227
|
+
else:
|
228
|
+
raise ex
|
229
|
+
|
218
230
|
except BadRequestError as ex:
|
219
231
|
return self.handle_bad_request(ex), model_call()
|
220
232
|
|
@@ -294,8 +294,12 @@ def chat_tool_calls(message: Any, tools: list[ToolInfo]) -> Optional[List[ToolCa
|
|
294
294
|
|
295
295
|
|
296
296
|
def chat_message_assistant(message: Any, tools: list[ToolInfo]) -> ChatMessageAssistant:
|
297
|
+
reasoning = getattr(message, "reasoning", None)
|
298
|
+
if reasoning is not None:
|
299
|
+
reasoning = str(reasoning)
|
297
300
|
return ChatMessageAssistant(
|
298
301
|
content=message.content or "",
|
299
302
|
source="generate",
|
300
303
|
tool_calls=chat_tool_calls(message, tools),
|
304
|
+
reasoning=reasoning,
|
301
305
|
)
|
@@ -35,10 +35,12 @@ from .._model_output import (
|
|
35
35
|
StopReason,
|
36
36
|
)
|
37
37
|
from .._openai import (
|
38
|
-
|
38
|
+
is_gpt,
|
39
39
|
is_o1_full,
|
40
40
|
is_o1_mini,
|
41
41
|
is_o1_preview,
|
42
|
+
is_o3,
|
43
|
+
is_o_series,
|
42
44
|
openai_chat_messages,
|
43
45
|
openai_chat_tool_choice,
|
44
46
|
openai_chat_tools,
|
@@ -140,8 +142,8 @@ class OpenAIAPI(ModelAPI):
|
|
140
142
|
def is_azure(self) -> bool:
|
141
143
|
return self.service == "azure"
|
142
144
|
|
143
|
-
def
|
144
|
-
return
|
145
|
+
def is_o_series(self) -> bool:
|
146
|
+
return is_o_series(self.model_name)
|
145
147
|
|
146
148
|
def is_o1_full(self) -> bool:
|
147
149
|
return is_o1_full(self.model_name)
|
@@ -149,9 +151,15 @@ class OpenAIAPI(ModelAPI):
|
|
149
151
|
def is_o1_mini(self) -> bool:
|
150
152
|
return is_o1_mini(self.model_name)
|
151
153
|
|
154
|
+
def is_o3(self) -> bool:
|
155
|
+
return is_o3(self.model_name)
|
156
|
+
|
152
157
|
def is_o1_preview(self) -> bool:
|
153
158
|
return is_o1_preview(self.model_name)
|
154
159
|
|
160
|
+
def is_gpt(self) -> bool:
|
161
|
+
return is_gpt(self.model_name)
|
162
|
+
|
155
163
|
async def generate(
|
156
164
|
self,
|
157
165
|
input: list[ChatMessage],
|
@@ -258,7 +266,7 @@ class OpenAIAPI(ModelAPI):
|
|
258
266
|
model=self.model_name,
|
259
267
|
)
|
260
268
|
if config.max_tokens is not None:
|
261
|
-
if self.
|
269
|
+
if self.is_o_series():
|
262
270
|
params["max_completion_tokens"] = config.max_tokens
|
263
271
|
else:
|
264
272
|
params["max_tokens"] = config.max_tokens
|
@@ -273,10 +281,10 @@ class OpenAIAPI(ModelAPI):
|
|
273
281
|
if config.seed is not None:
|
274
282
|
params["seed"] = config.seed
|
275
283
|
if config.temperature is not None:
|
276
|
-
if self.
|
284
|
+
if self.is_o_series():
|
277
285
|
warn_once(
|
278
286
|
logger,
|
279
|
-
"
|
287
|
+
"o series models do not support the 'temperature' parameter (temperature is always 1).",
|
280
288
|
)
|
281
289
|
else:
|
282
290
|
params["temperature"] = config.temperature
|
@@ -293,9 +301,9 @@ class OpenAIAPI(ModelAPI):
|
|
293
301
|
params["logprobs"] = config.logprobs
|
294
302
|
if config.top_logprobs is not None:
|
295
303
|
params["top_logprobs"] = config.top_logprobs
|
296
|
-
if tools and config.parallel_tool_calls is not None and not self.
|
304
|
+
if tools and config.parallel_tool_calls is not None and not self.is_o_series():
|
297
305
|
params["parallel_tool_calls"] = config.parallel_tool_calls
|
298
|
-
if config.reasoning_effort is not None and self.
|
306
|
+
if config.reasoning_effort is not None and not self.is_gpt():
|
299
307
|
params["reasoning_effort"] = config.reasoning_effort
|
300
308
|
|
301
309
|
return params
|
@@ -312,7 +320,11 @@ class OpenAIAPI(ModelAPI):
|
|
312
320
|
stop_reason: StopReason | None = None
|
313
321
|
if e.code == "context_length_exceeded":
|
314
322
|
stop_reason = "model_length"
|
315
|
-
elif
|
323
|
+
elif (
|
324
|
+
e.code == "invalid_prompt" # seems to happen for o1/o3
|
325
|
+
or e.code == "content_policy_violation" # seems to happen for vision
|
326
|
+
or e.code == "content_filter" # seems to happen on azure
|
327
|
+
):
|
316
328
|
stop_reason = "content_filter"
|
317
329
|
|
318
330
|
if stop_reason:
|
@@ -0,0 +1,17 @@
|
|
1
|
+
import re
|
2
|
+
from typing import NamedTuple
|
3
|
+
|
4
|
+
|
5
|
+
class ContentWithReasoning(NamedTuple):
|
6
|
+
content: str
|
7
|
+
reasoning: str
|
8
|
+
|
9
|
+
|
10
|
+
def parse_content_with_reasoning(content: str) -> ContentWithReasoning | None:
|
11
|
+
match = re.match(r"\s*<think>(.*?)</think>(.*)", content, re.DOTALL)
|
12
|
+
if match:
|
13
|
+
return ContentWithReasoning(
|
14
|
+
content=match.group(2).strip(), reasoning=match.group(1).strip()
|
15
|
+
)
|
16
|
+
else:
|
17
|
+
return None
|