inspect-ai 0.3.61__py3-none-any.whl → 0.3.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. inspect_ai/_cli/eval.py +13 -0
  2. inspect_ai/_cli/view.py +4 -0
  3. inspect_ai/_display/textual/widgets/transcript.py +15 -9
  4. inspect_ai/_eval/task/error.py +10 -14
  5. inspect_ai/_eval/task/run.py +10 -8
  6. inspect_ai/_util/transcript.py +11 -0
  7. inspect_ai/_view/www/dist/assets/index.css +1 -0
  8. inspect_ai/_view/www/dist/assets/index.js +100 -94
  9. inspect_ai/_view/www/log-schema.json +35 -19
  10. inspect_ai/_view/www/src/components/ChatView.mjs +23 -0
  11. inspect_ai/_view/www/src/types/log.d.ts +6 -4
  12. inspect_ai/log/_recorders/eval.py +1 -1
  13. inspect_ai/model/_chat_message.py +27 -0
  14. inspect_ai/model/_conversation.py +10 -3
  15. inspect_ai/model/_generate_config.py +6 -0
  16. inspect_ai/model/_model.py +74 -0
  17. inspect_ai/model/_openai.py +33 -1
  18. inspect_ai/model/_providers/anthropic.py +12 -0
  19. inspect_ai/model/_providers/groq.py +4 -0
  20. inspect_ai/model/_providers/openai.py +21 -9
  21. inspect_ai/model/_providers/providers.py +1 -1
  22. inspect_ai/model/_reasoning.py +17 -0
  23. inspect_ai/solver/_basic_agent.py +19 -9
  24. inspect_ai/tool/beta/_computer/_resources/Dockerfile +4 -0
  25. inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  26. inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/Code/User/settings.json +3 -0
  27. inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +61 -0
  28. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +10 -0
  29. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/METADATA +1 -1
  30. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/RECORD +34 -29
  31. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/LICENSE +0 -0
  32. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/WHEEL +0 -0
  33. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/entry_points.txt +0 -0
  34. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.62.dist-info}/top_level.txt +0 -0
@@ -260,13 +260,26 @@
260
260
  ],
261
261
  "default": null,
262
262
  "title": "Tool Calls"
263
+ },
264
+ "reasoning": {
265
+ "anyOf": [
266
+ {
267
+ "type": "string"
268
+ },
269
+ {
270
+ "type": "null"
271
+ }
272
+ ],
273
+ "default": null,
274
+ "title": "Reasoning"
263
275
  }
264
276
  },
265
277
  "required": [
266
278
  "content",
267
279
  "source",
268
280
  "role",
269
- "tool_calls"
281
+ "tool_calls",
282
+ "reasoning"
270
283
  ],
271
284
  "title": "ChatMessageAssistant",
272
285
  "type": "object",
@@ -486,7 +499,10 @@
486
499
  "tool_call_id": {
487
500
  "anyOf": [
488
501
  {
489
- "type": "string"
502
+ "items": {
503
+ "type": "string"
504
+ },
505
+ "type": "array"
490
506
  },
491
507
  {
492
508
  "type": "null"
@@ -1131,7 +1147,6 @@
1131
1147
  "presence_penalty": null,
1132
1148
  "logit_bias": null,
1133
1149
  "seed": null,
1134
- "suffix": null,
1135
1150
  "top_k": null,
1136
1151
  "num_choices": null,
1137
1152
  "logprobs": null,
@@ -1140,7 +1155,8 @@
1140
1155
  "internal_tools": null,
1141
1156
  "max_tool_output": null,
1142
1157
  "cache_prompt": null,
1143
- "reasoning_effort": null
1158
+ "reasoning_effort": null,
1159
+ "reasoning_history": null
1144
1160
  }
1145
1161
  }
1146
1162
  },
@@ -2120,18 +2136,6 @@
2120
2136
  "default": null,
2121
2137
  "title": "Seed"
2122
2138
  },
2123
- "suffix": {
2124
- "anyOf": [
2125
- {
2126
- "type": "string"
2127
- },
2128
- {
2129
- "type": "null"
2130
- }
2131
- ],
2132
- "default": null,
2133
- "title": "Suffix"
2134
- },
2135
2139
  "top_k": {
2136
2140
  "anyOf": [
2137
2141
  {
@@ -2248,6 +2252,18 @@
2248
2252
  ],
2249
2253
  "default": null,
2250
2254
  "title": "Reasoning Effort"
2255
+ },
2256
+ "reasoning_history": {
2257
+ "anyOf": [
2258
+ {
2259
+ "type": "boolean"
2260
+ },
2261
+ {
2262
+ "type": "null"
2263
+ }
2264
+ ],
2265
+ "default": null,
2266
+ "title": "Reasoning History"
2251
2267
  }
2252
2268
  },
2253
2269
  "title": "GenerateConfig",
@@ -2266,7 +2282,6 @@
2266
2282
  "presence_penalty",
2267
2283
  "logit_bias",
2268
2284
  "seed",
2269
- "suffix",
2270
2285
  "top_k",
2271
2286
  "num_choices",
2272
2287
  "logprobs",
@@ -2275,7 +2290,8 @@
2275
2290
  "internal_tools",
2276
2291
  "max_tool_output",
2277
2292
  "cache_prompt",
2278
- "reasoning_effort"
2293
+ "reasoning_effort",
2294
+ "reasoning_history"
2279
2295
  ],
2280
2296
  "additionalProperties": false
2281
2297
  },
@@ -4247,9 +4263,9 @@
4247
4263
  "parallel_tool_calls": null,
4248
4264
  "presence_penalty": null,
4249
4265
  "reasoning_effort": null,
4266
+ "reasoning_history": null,
4250
4267
  "seed": null,
4251
4268
  "stop_seqs": null,
4252
- "suffix": null,
4253
4269
  "system_message": null,
4254
4270
  "temperature": null,
4255
4271
  "timeout": null,
@@ -8,6 +8,7 @@ import { ExpandablePanel } from "./ExpandablePanel.mjs";
8
8
  import { FontSize, TextStyle } from "../appearance/Fonts.mjs";
9
9
  import { resolveToolInput, ToolCallView } from "./Tools.mjs";
10
10
  import { VirtualList } from "./VirtualList.mjs";
11
+ import { MarkdownDiv } from "./MarkdownDiv.mjs";
11
12
 
12
13
  /**
13
14
  * Renders the ChatViewVirtualList component.
@@ -282,7 +283,29 @@ const ChatMessage = ({
282
283
  <i class="${iconForMsg(message)}"></i>
283
284
  ${message.role}
284
285
  </div>
286
+
287
+ ${
288
+ message.role === "assistant" && message.reasoning
289
+ ? html` <div
290
+ style=${{
291
+ marginLeft: indented ? "1.1rem" : "0",
292
+ paddingBottom: "0.8rem",
293
+ }}
294
+ >
295
+ <div style=${{ ...TextStyle.label, ...TextStyle.secondary }}>Reasoning</div>
296
+ <${ExpandablePanel} collapse=${true}><${MarkdownDiv} markdown=${message.reasoning}/></${ExpandablePanel}>
297
+ </div>`
298
+ : undefined
299
+ }
300
+
285
301
  <div style=${{ marginLeft: indented ? "1.1rem" : "0", paddingBottom: indented ? "0.8rem" : "0" }}>
302
+ ${
303
+ message.role === "assistant" && message.reasoning
304
+ ? html`<div style=${{ ...TextStyle.label, ...TextStyle.secondary }}>
305
+ Response
306
+ </div>`
307
+ : ""
308
+ }
286
309
  <${ExpandablePanel} collapse=${collapse}>
287
310
  <${MessageContents}
288
311
  key=${`${id}-contents`}
@@ -70,7 +70,6 @@ export type LogitBias = {
70
70
  [k: string]: number;
71
71
  } | null;
72
72
  export type Seed = number | null;
73
- export type Suffix = string | null;
74
73
  export type TopK = number | null;
75
74
  export type NumChoices = number | null;
76
75
  export type Logprobs = boolean | null;
@@ -80,6 +79,7 @@ export type InternalTools = boolean | null;
80
79
  export type MaxToolOutput = number | null;
81
80
  export type CachePrompt = "auto" | boolean | null;
82
81
  export type ReasoningEffort = ("low" | "medium" | "high") | null;
82
+ export type ReasoningHistory = boolean | null;
83
83
  export type TotalSamples = number;
84
84
  export type CompletedSamples = number;
85
85
  export type Name3 = string;
@@ -133,7 +133,7 @@ export type Content1 =
133
133
  | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
134
134
  export type Source1 = ("input" | "generate") | null;
135
135
  export type Role1 = "user";
136
- export type ToolCallId = string | null;
136
+ export type ToolCallId = string[] | null;
137
137
  export type Content2 =
138
138
  | string
139
139
  | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
@@ -147,6 +147,7 @@ export type ParseError = string | null;
147
147
  export type Title = string | null;
148
148
  export type Format2 = "text" | "markdown";
149
149
  export type Content3 = string;
150
+ export type Reasoning = string | null;
150
151
  export type Content4 =
151
152
  | string
152
153
  | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
@@ -547,7 +548,6 @@ export interface GenerateConfig {
547
548
  presence_penalty: PresencePenalty;
548
549
  logit_bias: LogitBias;
549
550
  seed: Seed;
550
- suffix: Suffix;
551
551
  top_k: TopK;
552
552
  num_choices: NumChoices;
553
553
  logprobs: Logprobs;
@@ -557,6 +557,7 @@ export interface GenerateConfig {
557
557
  max_tool_output: MaxToolOutput;
558
558
  cache_prompt: CachePrompt;
559
559
  reasoning_effort: ReasoningEffort;
560
+ reasoning_history: ReasoningHistory;
560
561
  }
561
562
  export interface EvalResults {
562
563
  total_samples: TotalSamples;
@@ -658,6 +659,7 @@ export interface ChatMessageAssistant {
658
659
  source: Source2;
659
660
  role: Role2;
660
661
  tool_calls: ToolCalls;
662
+ reasoning: Reasoning;
661
663
  }
662
664
  export interface ToolCall {
663
665
  id: Id1;
@@ -901,7 +903,6 @@ export interface GenerateConfig1 {
901
903
  presence_penalty: PresencePenalty;
902
904
  logit_bias: LogitBias;
903
905
  seed: Seed;
904
- suffix: Suffix;
905
906
  top_k: TopK;
906
907
  num_choices: NumChoices;
907
908
  logprobs: Logprobs;
@@ -911,6 +912,7 @@ export interface GenerateConfig1 {
911
912
  max_tool_output: MaxToolOutput;
912
913
  cache_prompt: CachePrompt;
913
914
  reasoning_effort: ReasoningEffort;
915
+ reasoning_history: ReasoningHistory;
914
916
  }
915
917
  /**
916
918
  * Model call (raw request/response data).
@@ -203,7 +203,7 @@ class EvalRecorder(FileRecorder):
203
203
  # of small fetches from the zip file streams)
204
204
  temp_log: str | None = None
205
205
  fs = filesystem(location)
206
- if not fs.is_local():
206
+ if not fs.is_local() and header_only is False:
207
207
  with tempfile.NamedTemporaryFile(delete=False) as temp:
208
208
  temp_log = temp.name
209
209
  fs.get_file(location, temp_log)
@@ -7,6 +7,8 @@ from inspect_ai._util.content import Content, ContentText
7
7
  from inspect_ai.tool import ToolCall
8
8
  from inspect_ai.tool._tool_call import ToolCallError
9
9
 
10
+ from ._reasoning import parse_content_with_reasoning
11
+
10
12
  logger = getLogger(__name__)
11
13
 
12
14
 
@@ -83,6 +85,31 @@ class ChatMessageAssistant(ChatMessageBase):
83
85
  tool_calls: list[ToolCall] | None = Field(default=None)
84
86
  """Tool calls made by the model."""
85
87
 
88
+ reasoning: str | None = Field(default=None)
89
+ """Reasoning content."""
90
+
91
+ # Some OpenAI compatible REST endpoints include reasoning as a field alongside
92
+ # content, however since this field doesn't exist in the OpenAI interface,
93
+ # hosting providers (so far we've seen this with Together and Groq) may
94
+ # include the reasoning in a <think></think> tag before the main response.
95
+ # We expect this pattern to be repeated elsewhere, so include this hook to
96
+ # automatically extract the reasoning content when the response is prefaced
97
+ # with a <think> block. If this ends up being an overeach we can fall back
98
+ # to each provider manually parsing out <think> using a helper function.
99
+ # The implementation isn't important here, the critical thing to establish
100
+ # is that Inspect makes reasoning content available separately.
101
+ @model_validator(mode="before")
102
+ @classmethod
103
+ def extract_reasoning(cls, data: Any) -> Any:
104
+ if isinstance(data, dict):
105
+ content = data.get("content", None)
106
+ if isinstance(content, str):
107
+ parsed = parse_content_with_reasoning(content)
108
+ if parsed:
109
+ data["reasoning"] = parsed.reasoning
110
+ data["content"] = parsed.content
111
+ return data
112
+
86
113
 
87
114
  class ChatMessageTool(ChatMessageBase):
88
115
  role: Literal["tool"] = Field(default="tool")
@@ -2,7 +2,7 @@ from rich.console import RenderableType
2
2
  from rich.text import Text
3
3
 
4
4
  from inspect_ai._util.rich import lines_display
5
- from inspect_ai._util.transcript import transcript_markdown
5
+ from inspect_ai._util.transcript import transcript_markdown, transcript_reasoning
6
6
  from inspect_ai.util._conversation import conversation_panel
7
7
  from inspect_ai.util._display import display_type
8
8
 
@@ -38,8 +38,15 @@ def conversation_assistant_message(
38
38
  content=transcript_markdown(m.text, escape=True),
39
39
  )
40
40
 
41
- # start with assistant content
42
- content: list[RenderableType] = (
41
+ # build content
42
+ content: list[RenderableType] = []
43
+
44
+ # reasoning
45
+ if message.reasoning:
46
+ content.extend(transcript_reasoning(message.reasoning))
47
+
48
+ # message text
49
+ content.extend(
43
50
  [transcript_markdown(message.text, escape=True)] if message.text else []
44
51
  )
45
52
 
@@ -75,6 +75,9 @@ class GenerateConfigArgs(TypedDict, total=False):
75
75
  reasoning_effort: Literal["low", "medium", "high"] | None
76
76
  """Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
77
77
 
78
+ reasoning_history: bool | None
79
+ """Include reasoning in chat message history sent to generate."""
80
+
78
81
 
79
82
  class GenerateConfig(BaseModel):
80
83
  """Base class for model generation configs."""
@@ -145,6 +148,9 @@ class GenerateConfig(BaseModel):
145
148
  reasoning_effort: Literal["low", "medium", "high"] | None = Field(default=None)
146
149
  """Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
147
150
 
151
+ reasoning_history: bool | None = Field(default=None)
152
+ """Include reasoning in chat message history sent to generate."""
153
+
148
154
  def merge(
149
155
  self, other: Union["GenerateConfig", GenerateConfigArgs]
150
156
  ) -> "GenerateConfig":
@@ -168,6 +168,10 @@ class ModelAPI(abc.ABC):
168
168
  """Tool results can contain images"""
169
169
  return False
170
170
 
171
+ def has_reasoning_history(self) -> bool:
172
+ """Chat message assistant messages can include reasoning."""
173
+ return False
174
+
171
175
 
172
176
  class Model:
173
177
  """Model interface."""
@@ -302,6 +306,11 @@ class Model:
302
306
  tools = []
303
307
  tool_choice = "none"
304
308
 
309
+ # handle reasoning history
310
+ input = resolve_reasoning_history(
311
+ input, config, self.api.has_reasoning_history()
312
+ )
313
+
305
314
  # apply any tool model_input handlers
306
315
  input = resolve_tool_model_input(tdefs, input)
307
316
 
@@ -726,6 +735,71 @@ def simple_input_messages(
726
735
  return messages
727
736
 
728
737
 
738
+ def resolve_reasoning_history(
739
+ messages: list[ChatMessage], config: GenerateConfig, api_has_reasoning_history: bool
740
+ ) -> list[ChatMessage]:
741
+ # determine if we are including reasoning history
742
+ reasoning_history = config.reasoning_history is not False
743
+
744
+ # determine up front if we have any reasoning content
745
+ have_reasoning = any(
746
+ [
747
+ isinstance(m, ChatMessageAssistant) and m.reasoning is not None
748
+ for m in messages
749
+ ]
750
+ )
751
+ if not have_reasoning:
752
+ return messages
753
+
754
+ # API asssistant message format directly supports reasoning history so we will:
755
+ # (a) Remove reasoning content entirely if config says not to include it; or
756
+ # (b) Leave the messages alone if config says to include it
757
+ if api_has_reasoning_history:
758
+ # remove reasoning history as per config
759
+ if not reasoning_history:
760
+ resolved_messages: list[ChatMessage] = []
761
+ for message in messages:
762
+ if isinstance(message, ChatMessageAssistant):
763
+ resolved_messages.append(
764
+ message.model_copy(update={"reasoning": None})
765
+ )
766
+ else:
767
+ resolved_messages.append(message)
768
+
769
+ return resolved_messages
770
+
771
+ # include reasoning history as per config
772
+ else:
773
+ return messages
774
+
775
+ # API can't represent reasoning natively so include <think> tags
776
+ elif reasoning_history:
777
+ resolved_messages = []
778
+ for message in messages:
779
+ if (
780
+ isinstance(message, ChatMessageAssistant)
781
+ and message.reasoning is not None
782
+ ):
783
+ message = deepcopy(message)
784
+ if isinstance(message.content, str):
785
+ message.content = (
786
+ f"<think>\n{message.reasoning}\n</think>\n\n{message.content}"
787
+ )
788
+ else:
789
+ message.content.insert(
790
+ 0, ContentText(text=f"<think>\n{message.reasoning}\n</think>\n")
791
+ )
792
+ message.reasoning = None
793
+
794
+ resolved_messages.append(message)
795
+
796
+ return resolved_messages
797
+
798
+ # api doesn't handle reasoning and config says no reasoning_history, nothing to do
799
+ else:
800
+ return messages
801
+
802
+
729
803
  def resolve_tool_model_input(
730
804
  tdefs: list[ToolDef], messages: list[ChatMessage]
731
805
  ) -> list[ChatMessage]:
@@ -43,10 +43,18 @@ from ._chat_message import (
43
43
  from ._model_output import ModelUsage, StopReason, as_stop_reason
44
44
 
45
45
 
46
+ def is_o_series(name: str) -> bool:
47
+ return is_o1(name) or is_o3(name)
48
+
49
+
46
50
  def is_o1(name: str) -> bool:
47
51
  return name.startswith("o1")
48
52
 
49
53
 
54
+ def is_o3(name: str) -> bool:
55
+ return name.startswith("o3")
56
+
57
+
50
58
  def is_o1_full(name: str) -> bool:
51
59
  return is_o1(name) and not is_o1_mini(name) and not is_o1_preview(name)
52
60
 
@@ -55,10 +63,18 @@ def is_o1_mini(name: str) -> bool:
55
63
  return name.startswith("o1-mini")
56
64
 
57
65
 
66
+ def is_o3_mini(name: str) -> bool:
67
+ return name.startswith("o3-mini")
68
+
69
+
58
70
  def is_o1_preview(name: str) -> bool:
59
71
  return name.startswith("o1-preview")
60
72
 
61
73
 
74
+ def is_gpt(name: str) -> bool:
75
+ return name.startswith("gpt")
76
+
77
+
62
78
  def openai_chat_tool_call(tool_call: ToolCall) -> ChatCompletionMessageToolCall:
63
79
  return ChatCompletionMessageToolCall(
64
80
  type="function",
@@ -296,6 +312,14 @@ def chat_messages_from_openai(
296
312
  else:
297
313
  content = [content_from_openai(c) for c in asst_content]
298
314
 
315
+ # resolve reasoning (OpenAI doesn't suport this however OpenAI-compatible
316
+ # interfaces e.g. DeepSeek do include this field so we pluck it out)
317
+ reasoning = message.get("reasoning_content", None) or message.get(
318
+ "reasoning", None
319
+ )
320
+ if reasoning is not None:
321
+ reasoning = str(reasoning)
322
+
299
323
  # return message
300
324
  if "tool_calls" in message:
301
325
  tool_calls: list[ToolCall] = []
@@ -306,7 +330,11 @@ def chat_messages_from_openai(
306
330
  else:
307
331
  tool_calls = []
308
332
  chat_messages.append(
309
- ChatMessageAssistant(content=content, tool_calls=tool_calls or None)
333
+ ChatMessageAssistant(
334
+ content=content,
335
+ tool_calls=tool_calls or None,
336
+ reasoning=reasoning,
337
+ )
310
338
  )
311
339
  elif message["role"] == "tool":
312
340
  tool_content = message.get("content", None) or ""
@@ -357,10 +385,14 @@ def chat_message_assistant_from_openai(
357
385
  message: ChatCompletionMessage, tools: list[ToolInfo]
358
386
  ) -> ChatMessageAssistant:
359
387
  refusal = getattr(message, "refusal", None)
388
+ reasoning = getattr(message, "reasoning_content", None) or getattr(
389
+ message, "reasoning", None
390
+ )
360
391
  return ChatMessageAssistant(
361
392
  content=refusal or message.content or "",
362
393
  source="generate",
363
394
  tool_calls=chat_tool_calls_from_openai(message, tools),
395
+ reasoning=reasoning,
364
396
  )
365
397
 
366
398
 
@@ -12,6 +12,7 @@ else:
12
12
 
13
13
  from anthropic import (
14
14
  APIConnectionError,
15
+ APIStatusError,
15
16
  AsyncAnthropic,
16
17
  AsyncAnthropicBedrock,
17
18
  AsyncAnthropicVertex,
@@ -215,6 +216,17 @@ class AnthropicAPI(ModelAPI):
215
216
  # return output and call
216
217
  return output, model_call()
217
218
 
219
+ except APIStatusError as ex:
220
+ if ex.status_code == 413:
221
+ return ModelOutput.from_content(
222
+ model=self.model_name,
223
+ content=ex.message,
224
+ stop_reason="model_length",
225
+ error=ex.message,
226
+ ), model_call()
227
+ else:
228
+ raise ex
229
+
218
230
  except BadRequestError as ex:
219
231
  return self.handle_bad_request(ex), model_call()
220
232
 
@@ -294,8 +294,12 @@ def chat_tool_calls(message: Any, tools: list[ToolInfo]) -> Optional[List[ToolCa
294
294
 
295
295
 
296
296
  def chat_message_assistant(message: Any, tools: list[ToolInfo]) -> ChatMessageAssistant:
297
+ reasoning = getattr(message, "reasoning", None)
298
+ if reasoning is not None:
299
+ reasoning = str(reasoning)
297
300
  return ChatMessageAssistant(
298
301
  content=message.content or "",
299
302
  source="generate",
300
303
  tool_calls=chat_tool_calls(message, tools),
304
+ reasoning=reasoning,
301
305
  )
@@ -35,10 +35,12 @@ from .._model_output import (
35
35
  StopReason,
36
36
  )
37
37
  from .._openai import (
38
- is_o1,
38
+ is_gpt,
39
39
  is_o1_full,
40
40
  is_o1_mini,
41
41
  is_o1_preview,
42
+ is_o3,
43
+ is_o_series,
42
44
  openai_chat_messages,
43
45
  openai_chat_tool_choice,
44
46
  openai_chat_tools,
@@ -140,8 +142,8 @@ class OpenAIAPI(ModelAPI):
140
142
  def is_azure(self) -> bool:
141
143
  return self.service == "azure"
142
144
 
143
- def is_o1(self) -> bool:
144
- return is_o1(self.model_name)
145
+ def is_o_series(self) -> bool:
146
+ return is_o_series(self.model_name)
145
147
 
146
148
  def is_o1_full(self) -> bool:
147
149
  return is_o1_full(self.model_name)
@@ -149,9 +151,15 @@ class OpenAIAPI(ModelAPI):
149
151
  def is_o1_mini(self) -> bool:
150
152
  return is_o1_mini(self.model_name)
151
153
 
154
+ def is_o3(self) -> bool:
155
+ return is_o3(self.model_name)
156
+
152
157
  def is_o1_preview(self) -> bool:
153
158
  return is_o1_preview(self.model_name)
154
159
 
160
+ def is_gpt(self) -> bool:
161
+ return is_gpt(self.model_name)
162
+
155
163
  async def generate(
156
164
  self,
157
165
  input: list[ChatMessage],
@@ -258,7 +266,7 @@ class OpenAIAPI(ModelAPI):
258
266
  model=self.model_name,
259
267
  )
260
268
  if config.max_tokens is not None:
261
- if self.is_o1():
269
+ if self.is_o_series():
262
270
  params["max_completion_tokens"] = config.max_tokens
263
271
  else:
264
272
  params["max_tokens"] = config.max_tokens
@@ -273,10 +281,10 @@ class OpenAIAPI(ModelAPI):
273
281
  if config.seed is not None:
274
282
  params["seed"] = config.seed
275
283
  if config.temperature is not None:
276
- if self.is_o1():
284
+ if self.is_o_series():
277
285
  warn_once(
278
286
  logger,
279
- "o1 models do not support the 'temperature' parameter (temperature is always 1).",
287
+ "o series models do not support the 'temperature' parameter (temperature is always 1).",
280
288
  )
281
289
  else:
282
290
  params["temperature"] = config.temperature
@@ -293,9 +301,9 @@ class OpenAIAPI(ModelAPI):
293
301
  params["logprobs"] = config.logprobs
294
302
  if config.top_logprobs is not None:
295
303
  params["top_logprobs"] = config.top_logprobs
296
- if tools and config.parallel_tool_calls is not None and not self.is_o1():
304
+ if tools and config.parallel_tool_calls is not None and not self.is_o_series():
297
305
  params["parallel_tool_calls"] = config.parallel_tool_calls
298
- if config.reasoning_effort is not None and self.is_o1_full():
306
+ if config.reasoning_effort is not None and not self.is_gpt():
299
307
  params["reasoning_effort"] = config.reasoning_effort
300
308
 
301
309
  return params
@@ -312,7 +320,11 @@ class OpenAIAPI(ModelAPI):
312
320
  stop_reason: StopReason | None = None
313
321
  if e.code == "context_length_exceeded":
314
322
  stop_reason = "model_length"
315
- elif e.code == "invalid_prompt":
323
+ elif (
324
+ e.code == "invalid_prompt" # seems to happen for o1/o3
325
+ or e.code == "content_policy_violation" # seems to happen for vision
326
+ or e.code == "content_filter" # seems to happen on azure
327
+ ):
316
328
  stop_reason = "content_filter"
317
329
 
318
330
  if stop_reason:
@@ -16,7 +16,7 @@ from .._registry import modelapi
16
16
  def groq() -> type[ModelAPI]:
17
17
  FEATURE = "Groq API"
18
18
  PACKAGE = "groq"
19
- MIN_VERSION = "0.11.0"
19
+ MIN_VERSION = "0.16.0"
20
20
 
21
21
  # verify we have the package
22
22
  try:
@@ -0,0 +1,17 @@
1
+ import re
2
+ from typing import NamedTuple
3
+
4
+
5
+ class ContentWithReasoning(NamedTuple):
6
+ content: str
7
+ reasoning: str
8
+
9
+
10
+ def parse_content_with_reasoning(content: str) -> ContentWithReasoning | None:
11
+ match = re.match(r"\s*<think>(.*?)</think>(.*)", content, re.DOTALL)
12
+ if match:
13
+ return ContentWithReasoning(
14
+ content=match.group(2).strip(), reasoning=match.group(1).strip()
15
+ )
16
+ else:
17
+ return None