agent-runtime-core 0.8.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -104,9 +104,27 @@ class OpenAIClient(LLMClient):
104
104
  tools: Optional[list[dict]] = None,
105
105
  temperature: Optional[float] = None,
106
106
  max_tokens: Optional[int] = None,
107
+ thinking: bool = False,
108
+ reasoning_effort: Optional[str] = None,
107
109
  **kwargs,
108
110
  ) -> LLMResponse:
109
- """Generate a completion from OpenAI."""
111
+ """
112
+ Generate a completion from OpenAI.
113
+
114
+ Args:
115
+ messages: List of messages in framework-neutral format
116
+ model: Model ID to use (defaults to self.default_model)
117
+ stream: Whether to stream (not used here, use stream() method)
118
+ tools: List of tools in OpenAI format
119
+ temperature: Sampling temperature
120
+ max_tokens: Maximum tokens to generate
121
+ thinking: Enable reasoning mode for o-series and GPT-5 models
122
+ reasoning_effort: Reasoning effort level: "low", "medium", or "high"
123
+ **kwargs: Additional parameters passed to the API
124
+
125
+ Returns:
126
+ LLMResponse with the generated message
127
+ """
110
128
  model = model or self.default_model
111
129
 
112
130
  request_kwargs = {
@@ -121,6 +139,15 @@ class OpenAIClient(LLMClient):
121
139
  if max_tokens is not None:
122
140
  request_kwargs["max_tokens"] = max_tokens
123
141
 
142
+ # Handle reasoning mode for o-series and GPT-5 models
143
+ if thinking or reasoning_effort:
144
+ # reasoning_effort controls how much reasoning the model does
145
+ # Valid values: "low", "medium", "high"
146
+ effort = reasoning_effort or "medium"
147
+ if effort not in ("low", "medium", "high"):
148
+ effort = "medium"
149
+ request_kwargs["reasoning_effort"] = effort
150
+
124
151
  request_kwargs.update(kwargs)
125
152
 
126
153
  response = await self._client.chat.completions.create(**request_kwargs)
@@ -146,9 +173,24 @@ class OpenAIClient(LLMClient):
146
173
  *,
147
174
  model: Optional[str] = None,
148
175
  tools: Optional[list[dict]] = None,
176
+ thinking: bool = False,
177
+ reasoning_effort: Optional[str] = None,
149
178
  **kwargs,
150
179
  ) -> AsyncIterator[LLMStreamChunk]:
151
- """Stream a completion from OpenAI."""
180
+ """
181
+ Stream a completion from OpenAI.
182
+
183
+ Args:
184
+ messages: List of messages
185
+ model: Model ID to use
186
+ tools: List of tools in OpenAI format
187
+ thinking: Enable reasoning mode for o-series and GPT-5 models
188
+ reasoning_effort: Reasoning effort level: "low", "medium", or "high"
189
+ **kwargs: Additional parameters
190
+
191
+ Yields:
192
+ LLMStreamChunk with delta content
193
+ """
152
194
  model = model or self.default_model
153
195
 
154
196
  request_kwargs = {
@@ -160,6 +202,13 @@ class OpenAIClient(LLMClient):
160
202
  if tools:
161
203
  request_kwargs["tools"] = tools
162
204
 
205
+ # Handle reasoning mode for o-series and GPT-5 models
206
+ if thinking or reasoning_effort:
207
+ effort = reasoning_effort or "medium"
208
+ if effort not in ("low", "medium", "high"):
209
+ effort = "medium"
210
+ request_kwargs["reasoning_effort"] = effort
211
+
163
212
  request_kwargs.update(kwargs)
164
213
 
165
214
  async with await self._client.chat.completions.create(**request_kwargs) as stream: