bridgic-llms-openai 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
The OpenAI integration module provides support for the OpenAI API.
|
|
3
|
+
|
|
4
|
+
This module implements integration interfaces with OpenAI language models, supporting
|
|
5
|
+
calls to large language models provided by OpenAI such as the GPT series, and provides
|
|
6
|
+
several wrappers for advanced functionality.
|
|
7
|
+
|
|
8
|
+
You can install the OpenAI integration package for Bridgic by running:
|
|
9
|
+
|
|
10
|
+
```shell
|
|
11
|
+
pip install bridgic-llms-openai
|
|
12
|
+
```
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from importlib.metadata import version
|
|
16
|
+
from ._openai_llm import OpenAIConfiguration, OpenAILlm
|
|
17
|
+
|
|
18
|
+
__version__ = version("bridgic-llms-openai")
|
|
19
|
+
__all__ = ["OpenAIConfiguration", "OpenAILlm", "__version__"]
|
|
@@ -0,0 +1,1127 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import httpx
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
from typing import List, Dict, Tuple, Optional, overload, Any, Union, Literal
|
|
6
|
+
from typing_extensions import override
|
|
7
|
+
from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageFunctionToolCall, ChatCompletionNamedToolChoiceParam, ChatCompletionToolChoiceOptionParam
|
|
8
|
+
from openai.types.chat.chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam, Function
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
from openai import Stream, OpenAI, AsyncOpenAI
|
|
11
|
+
from openai.types.chat.chat_completion_message import ChatCompletionMessage
|
|
12
|
+
from openai.resources.chat.completions.completions import ChatCompletionMessageParam
|
|
13
|
+
from openai.types.chat.chat_completion_system_message_param import ChatCompletionSystemMessageParam
|
|
14
|
+
from openai.types.chat.chat_completion_user_message_param import ChatCompletionUserMessageParam
|
|
15
|
+
from openai.types.chat.chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
|
|
16
|
+
from openai.types.chat.chat_completion_tool_message_param import ChatCompletionToolMessageParam
|
|
17
|
+
|
|
18
|
+
from bridgic.core.model import BaseLlm
|
|
19
|
+
from bridgic.core.model.types import *
|
|
20
|
+
from bridgic.core.model.protocols import StructuredOutput, ToolSelection, PydanticModel, JsonSchema, Constraint
|
|
21
|
+
from bridgic.core.utils._console import printer
|
|
22
|
+
from bridgic.core.utils._collection import filter_dict, merge_dict, validate_required_params
|
|
23
|
+
from bridgic.core.utils._tool_calling import generate_tool_id
|
|
24
|
+
from bridgic.llms.openai_like import OpenAILikeConfiguration
|
|
25
|
+
|
|
26
|
+
class OpenAIConfiguration(OpenAILikeConfiguration):
|
|
27
|
+
"""
|
|
28
|
+
Configuration for OpenAI chat completions.
|
|
29
|
+
"""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
class OpenAILlm(BaseLlm, StructuredOutput, ToolSelection):
|
|
33
|
+
"""
|
|
34
|
+
Wrapper class for OpenAI, providing common chat and stream calling interfaces for OpenAI model
|
|
35
|
+
and implementing the common protocols in the Bridgic framework.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
api_key : str
|
|
40
|
+
The API key for OpenAI services. Required for authentication.
|
|
41
|
+
api_base : Optional[str]
|
|
42
|
+
The base URL for the OpenAI API. If None, uses the default OpenAI endpoint.
|
|
43
|
+
configuration : Optional[OpenAIConfiguration]
|
|
44
|
+
The configuration for the OpenAI API. If None, uses the default configuration.
|
|
45
|
+
timeout : Optional[float]
|
|
46
|
+
Request timeout in seconds. If None, no timeout is applied.
|
|
47
|
+
http_client : Optional[httpx.Client]
|
|
48
|
+
Custom synchronous HTTP client for requests. If None, creates a default client.
|
|
49
|
+
http_async_client : Optional[httpx.AsyncClient]
|
|
50
|
+
Custom asynchronous HTTP client for requests. If None, creates a default client.
|
|
51
|
+
|
|
52
|
+
Examples
|
|
53
|
+
--------
|
|
54
|
+
Basic usage for chat completion:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
llm = OpenAILlm(api_key="your-api-key")
|
|
58
|
+
messages = [Message.from_text("Hello!", role=Role.USER)]
|
|
59
|
+
response = llm.chat(messages=messages, model="gpt-4")
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Structured output with Pydantic model:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
class Answer(BaseModel):
|
|
66
|
+
reasoning: str
|
|
67
|
+
result: int
|
|
68
|
+
|
|
69
|
+
constraint = PydanticModel(model=Answer)
|
|
70
|
+
structured_response = llm.structured_output(
|
|
71
|
+
messages=messages,
|
|
72
|
+
constraint=constraint,
|
|
73
|
+
model="gpt-4"
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Tool calling:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
tools = [Tool(name="calculator", description="Calculate math", parameters={})]
|
|
81
|
+
tool_calls, tool_call_response = llm.select_tool(messages=messages, tools=tools, model="gpt-4")
|
|
82
|
+
```
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
api_base: str
|
|
86
|
+
api_key: str
|
|
87
|
+
configuration: OpenAIConfiguration
|
|
88
|
+
timeout: float
|
|
89
|
+
http_client: httpx.Client
|
|
90
|
+
http_async_client: httpx.AsyncClient
|
|
91
|
+
|
|
92
|
+
client: OpenAI
|
|
93
|
+
async_client: AsyncOpenAI
|
|
94
|
+
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
api_key: str,
|
|
98
|
+
api_base: Optional[str] = None,
|
|
99
|
+
configuration: Optional[OpenAIConfiguration] = OpenAIConfiguration(),
|
|
100
|
+
timeout: Optional[float] = None,
|
|
101
|
+
http_client: Optional[httpx.Client] = None,
|
|
102
|
+
http_async_client: Optional[httpx.AsyncClient] = None,
|
|
103
|
+
):
|
|
104
|
+
"""
|
|
105
|
+
Initialize the OpenAI LLM client with configuration parameters.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
api_key : str
|
|
110
|
+
The API key for OpenAI services. Required for authentication.
|
|
111
|
+
api_base : Optional[str]
|
|
112
|
+
The base URL for the OpenAI API. If None, uses the default OpenAI endpoint.
|
|
113
|
+
configuration : Optional[OpenAIConfiguration]
|
|
114
|
+
The configuration for the OpenAI API. If None, uses the default configuration.
|
|
115
|
+
timeout : Optional[float]
|
|
116
|
+
Request timeout in seconds. If None, no timeout is applied.
|
|
117
|
+
http_client : Optional[httpx.Client]
|
|
118
|
+
Custom synchronous HTTP client for requests. If None, creates a default client.
|
|
119
|
+
http_async_client : Optional[httpx.AsyncClient]
|
|
120
|
+
Custom asynchronous HTTP client for requests. If None, creates a default client.
|
|
121
|
+
"""
|
|
122
|
+
# Record for serialization / deserialization.
|
|
123
|
+
self.api_base = api_base
|
|
124
|
+
self.api_key = api_key
|
|
125
|
+
self.configuration = configuration
|
|
126
|
+
self.timeout = timeout
|
|
127
|
+
self.http_client = http_client
|
|
128
|
+
self.http_async_client = http_async_client
|
|
129
|
+
|
|
130
|
+
# Initialize clients.
|
|
131
|
+
self.client = OpenAI(base_url=api_base, api_key=api_key, timeout=timeout, http_client=http_client)
|
|
132
|
+
self.async_client = AsyncOpenAI(base_url=api_base, api_key=api_key, timeout=timeout, http_client=http_async_client)
|
|
133
|
+
|
|
134
|
+
def chat(
|
|
135
|
+
self,
|
|
136
|
+
messages: List[Message],
|
|
137
|
+
model: Optional[str] = None,
|
|
138
|
+
temperature: Optional[float] = None,
|
|
139
|
+
top_p: Optional[float] = None,
|
|
140
|
+
presence_penalty: Optional[float] = None,
|
|
141
|
+
frequency_penalty: Optional[float] = None,
|
|
142
|
+
max_tokens: Optional[int] = None,
|
|
143
|
+
stop: Optional[List[str]] = None,
|
|
144
|
+
tools: Optional[List[Tool]] = None,
|
|
145
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
146
|
+
**kwargs,
|
|
147
|
+
) -> Response:
|
|
148
|
+
"""
|
|
149
|
+
Send a synchronous chat completion request to OpenAI.
|
|
150
|
+
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
messages : List[Message]
|
|
154
|
+
A list of messages comprising the conversation so far.
|
|
155
|
+
model : str
|
|
156
|
+
Model ID used to generate the response, like `gpt-4o` or `gpt-4`.
|
|
157
|
+
temperature : Optional[float]
|
|
158
|
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
|
159
|
+
make the output more random, while lower values like 0.2 will make it more
|
|
160
|
+
focused and deterministic.
|
|
161
|
+
top_p : Optional[float]
|
|
162
|
+
An alternative to sampling with temperature, called nucleus sampling, where the
|
|
163
|
+
model considers the results of the tokens with top_p probability mass.
|
|
164
|
+
presence_penalty : Optional[float]
|
|
165
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
166
|
+
whether they appear in the text so far, increasing the model's likelihood to
|
|
167
|
+
talk about new topics.
|
|
168
|
+
frequency_penalty : Optional[float]
|
|
169
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their
|
|
170
|
+
existing frequency in the text so far, decreasing the model's likelihood to
|
|
171
|
+
repeat the same line verbatim.
|
|
172
|
+
max_tokens : Optional[int]
|
|
173
|
+
The maximum number of tokens that can be generated in the chat completion.
|
|
174
|
+
This value is now deprecated in favor of `max_completion_tokens`.
|
|
175
|
+
stop : Optional[List[str]]
|
|
176
|
+
Up to 4 sequences where the API will stop generating further tokens.
|
|
177
|
+
Not supported with latest reasoning models `o3` and `o3-mini`.
|
|
178
|
+
tools : Optional[List[Tool]]
|
|
179
|
+
A list of tools to use in the chat completion.
|
|
180
|
+
extra_body : Optional[Dict[str, Any]]
|
|
181
|
+
Add additional JSON properties to the request.
|
|
182
|
+
**kwargs
|
|
183
|
+
Additional keyword arguments passed to the OpenAI API.
|
|
184
|
+
|
|
185
|
+
Returns
|
|
186
|
+
-------
|
|
187
|
+
Response
|
|
188
|
+
A response object containing the generated message and raw API response.
|
|
189
|
+
"""
|
|
190
|
+
params = self._build_parameters(
|
|
191
|
+
messages=messages,
|
|
192
|
+
model=model,
|
|
193
|
+
temperature=temperature,
|
|
194
|
+
top_p=top_p,
|
|
195
|
+
presence_penalty=presence_penalty,
|
|
196
|
+
frequency_penalty=frequency_penalty,
|
|
197
|
+
max_tokens=max_tokens,
|
|
198
|
+
stop=stop,
|
|
199
|
+
extra_body=extra_body,
|
|
200
|
+
**kwargs,
|
|
201
|
+
)
|
|
202
|
+
# Validate required parameters for non-streaming chat completion
|
|
203
|
+
validate_required_params(params, ["messages", "model"])
|
|
204
|
+
|
|
205
|
+
response: ChatCompletion = self.client.chat.completions.create(**params)
|
|
206
|
+
return self._handle_chat_response(response)
|
|
207
|
+
|
|
208
|
+
def stream(
|
|
209
|
+
self,
|
|
210
|
+
messages: List[Message],
|
|
211
|
+
model: Optional[str] = None,
|
|
212
|
+
temperature: Optional[float] = None,
|
|
213
|
+
top_p: Optional[float] = None,
|
|
214
|
+
presence_penalty: Optional[float] = None,
|
|
215
|
+
frequency_penalty: Optional[float] = None,
|
|
216
|
+
max_tokens: Optional[int] = None,
|
|
217
|
+
stop: Optional[List[str]] = None,
|
|
218
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
219
|
+
**kwargs,
|
|
220
|
+
) -> StreamResponse:
|
|
221
|
+
"""
|
|
222
|
+
Send a streaming chat completion request to OpenAI.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
messages : List[Message]
|
|
227
|
+
A list of messages comprising the conversation so far.
|
|
228
|
+
model : str
|
|
229
|
+
Model ID used to generate the response, like `gpt-4o` or `gpt-4`.
|
|
230
|
+
temperature : Optional[float]
|
|
231
|
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
|
232
|
+
make the output more random, while lower values like 0.2 will make it more
|
|
233
|
+
focused and deterministic.
|
|
234
|
+
top_p : Optional[float]
|
|
235
|
+
An alternative to sampling with temperature, called nucleus sampling, where the
|
|
236
|
+
model considers the results of the tokens with top_p probability mass.
|
|
237
|
+
presence_penalty : Optional[float]
|
|
238
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
239
|
+
whether they appear in the text so far, increasing the model's likelihood to
|
|
240
|
+
talk about new topics.
|
|
241
|
+
frequency_penalty : Optional[float]
|
|
242
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their
|
|
243
|
+
existing frequency in the text so far, decreasing the model's likelihood to
|
|
244
|
+
repeat the same line verbatim.
|
|
245
|
+
max_tokens : Optional[int]
|
|
246
|
+
The maximum number of tokens that can be generated in the chat completion.
|
|
247
|
+
This value is now deprecated in favor of `max_completion_tokens`.
|
|
248
|
+
stop : Optional[List[str]]
|
|
249
|
+
Up to 4 sequences where the API will stop generating further tokens.
|
|
250
|
+
Not supported with latest reasoning models `o3` and `o3-mini`.
|
|
251
|
+
extra_body : Optional[Dict[str, Any]]
|
|
252
|
+
Add additional JSON properties to the request.
|
|
253
|
+
**kwargs
|
|
254
|
+
Additional keyword arguments passed to the OpenAI API.
|
|
255
|
+
|
|
256
|
+
Yields
|
|
257
|
+
------
|
|
258
|
+
MessageChunk
|
|
259
|
+
Individual chunks of the response as they are received from the API.
|
|
260
|
+
Each chunk contains a delta (partial content) and the raw response.
|
|
261
|
+
|
|
262
|
+
Notes
|
|
263
|
+
-----
|
|
264
|
+
This method enables real-time streaming of the model's response,
|
|
265
|
+
useful for providing incremental updates to users as the response is generated.
|
|
266
|
+
"""
|
|
267
|
+
params = self._build_parameters(
|
|
268
|
+
messages=messages,
|
|
269
|
+
model=model,
|
|
270
|
+
temperature=temperature,
|
|
271
|
+
top_p=top_p,
|
|
272
|
+
presence_penalty=presence_penalty,
|
|
273
|
+
frequency_penalty=frequency_penalty,
|
|
274
|
+
max_tokens=max_tokens,
|
|
275
|
+
stop=stop,
|
|
276
|
+
extra_body=extra_body,
|
|
277
|
+
stream=True,
|
|
278
|
+
**kwargs,
|
|
279
|
+
)
|
|
280
|
+
# Validate required parameters for streaming chat completion
|
|
281
|
+
validate_required_params(params, ["messages", "model", "stream"])
|
|
282
|
+
|
|
283
|
+
response: Stream[ChatCompletionChunk] = self.client.chat.completions.create(**params)
|
|
284
|
+
for chunk in response:
|
|
285
|
+
if chunk.choices and chunk.choices[0].delta.content:
|
|
286
|
+
delta_content = chunk.choices[0].delta.content
|
|
287
|
+
delta_content = delta_content if delta_content else ""
|
|
288
|
+
yield MessageChunk(delta=delta_content, raw=chunk)
|
|
289
|
+
|
|
290
|
+
async def achat(
|
|
291
|
+
self,
|
|
292
|
+
messages: List[Message],
|
|
293
|
+
model: Optional[str] = None,
|
|
294
|
+
temperature: Optional[float] = None,
|
|
295
|
+
top_p: Optional[float] = None,
|
|
296
|
+
presence_penalty: Optional[float] = None,
|
|
297
|
+
frequency_penalty: Optional[float] = None,
|
|
298
|
+
max_tokens: Optional[int] = None,
|
|
299
|
+
stop: Optional[List[str]] = None,
|
|
300
|
+
tools: Optional[List[Tool]] = None,
|
|
301
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
302
|
+
**kwargs,
|
|
303
|
+
) -> Response:
|
|
304
|
+
"""
|
|
305
|
+
Send an asynchronous chat completion request to OpenAI.
|
|
306
|
+
|
|
307
|
+
Parameters
|
|
308
|
+
----------
|
|
309
|
+
messages : List[Message]
|
|
310
|
+
A list of messages comprising the conversation so far.
|
|
311
|
+
model : str
|
|
312
|
+
Model ID used to generate the response, like `gpt-4o` or `gpt-4`.
|
|
313
|
+
temperature : Optional[float]
|
|
314
|
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
|
315
|
+
make the output more random, while lower values like 0.2 will make it more
|
|
316
|
+
focused and deterministic.
|
|
317
|
+
top_p : Optional[float]
|
|
318
|
+
An alternative to sampling with temperature, called nucleus sampling, where the
|
|
319
|
+
model considers the results of the tokens with top_p probability mass.
|
|
320
|
+
presence_penalty : Optional[float]
|
|
321
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
322
|
+
whether they appear in the text so far, increasing the model's likelihood to
|
|
323
|
+
talk about new topics.
|
|
324
|
+
frequency_penalty : Optional[float]
|
|
325
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their
|
|
326
|
+
existing frequency in the text so far, decreasing the model's likelihood to
|
|
327
|
+
repeat the same line verbatim.
|
|
328
|
+
max_tokens : Optional[int]
|
|
329
|
+
The maximum number of tokens that can be generated in the chat completion.
|
|
330
|
+
This value is now deprecated in favor of `max_completion_tokens`.
|
|
331
|
+
stop : Optional[List[str]]
|
|
332
|
+
Up to 4 sequences where the API will stop generating further tokens.
|
|
333
|
+
Not supported with latest reasoning models `o3` and `o3-mini`.
|
|
334
|
+
tools : Optional[List[Tool]]
|
|
335
|
+
A list of tools to use in the chat completion.
|
|
336
|
+
extra_body : Optional[Dict[str, Any]]
|
|
337
|
+
Add additional JSON properties to the request.
|
|
338
|
+
**kwargs
|
|
339
|
+
Additional keyword arguments passed to the OpenAI API.
|
|
340
|
+
|
|
341
|
+
Returns
|
|
342
|
+
-------
|
|
343
|
+
Response
|
|
344
|
+
A response object containing the generated message and raw API response.
|
|
345
|
+
|
|
346
|
+
Notes
|
|
347
|
+
-----
|
|
348
|
+
This is the asynchronous version of the chat method, suitable for
|
|
349
|
+
concurrent processing and non-blocking I/O operations.
|
|
350
|
+
"""
|
|
351
|
+
params = self._build_parameters(
|
|
352
|
+
messages=messages,
|
|
353
|
+
model=model,
|
|
354
|
+
temperature=temperature,
|
|
355
|
+
top_p=top_p,
|
|
356
|
+
presence_penalty=presence_penalty,
|
|
357
|
+
frequency_penalty=frequency_penalty,
|
|
358
|
+
max_tokens=max_tokens,
|
|
359
|
+
stop=stop,
|
|
360
|
+
extra_body=extra_body,
|
|
361
|
+
**kwargs,
|
|
362
|
+
)
|
|
363
|
+
# Validate required parameters for non-streaming chat completion
|
|
364
|
+
validate_required_params(params, ["messages", "model"])
|
|
365
|
+
|
|
366
|
+
response = await self.async_client.chat.completions.create(**params)
|
|
367
|
+
return self._handle_chat_response(response)
|
|
368
|
+
|
|
369
|
+
async def astream(
|
|
370
|
+
self,
|
|
371
|
+
messages: List[Message],
|
|
372
|
+
model: Optional[str] = None,
|
|
373
|
+
temperature: Optional[float] = None,
|
|
374
|
+
top_p: Optional[float] = None,
|
|
375
|
+
presence_penalty: Optional[float] = None,
|
|
376
|
+
frequency_penalty: Optional[float] = None,
|
|
377
|
+
max_tokens: Optional[int] = None,
|
|
378
|
+
stop: Optional[List[str]] = None,
|
|
379
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
380
|
+
**kwargs,
|
|
381
|
+
) -> AsyncStreamResponse:
|
|
382
|
+
"""
|
|
383
|
+
Send an asynchronous streaming chat completion request to OpenAI.
|
|
384
|
+
|
|
385
|
+
Parameters
|
|
386
|
+
----------
|
|
387
|
+
messages : List[Message]
|
|
388
|
+
A list of messages comprising the conversation so far.
|
|
389
|
+
model : str
|
|
390
|
+
Model ID used to generate the response, like `gpt-4o` or `gpt-4`.
|
|
391
|
+
temperature : Optional[float]
|
|
392
|
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
|
393
|
+
make the output more random, while lower values like 0.2 will make it more
|
|
394
|
+
focused and deterministic.
|
|
395
|
+
top_p : Optional[float]
|
|
396
|
+
An alternative to sampling with temperature, called nucleus sampling, where the
|
|
397
|
+
model considers the results of the tokens with top_p probability mass.
|
|
398
|
+
presence_penalty : Optional[float]
|
|
399
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
400
|
+
whether they appear in the text so far, increasing the model's likelihood to
|
|
401
|
+
talk about new topics.
|
|
402
|
+
frequency_penalty : Optional[float]
|
|
403
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their
|
|
404
|
+
existing frequency in the text so far, decreasing the model's likelihood to
|
|
405
|
+
repeat the same line verbatim.
|
|
406
|
+
max_tokens : Optional[int]
|
|
407
|
+
The maximum number of tokens that can be generated in the chat completion.
|
|
408
|
+
This value is now deprecated in favor of `max_completion_tokens`.
|
|
409
|
+
stop : Optional[List[str]]
|
|
410
|
+
Up to 4 sequences where the API will stop generating further tokens.
|
|
411
|
+
Not supported with latest reasoning models `o3` and `o3-mini`.
|
|
412
|
+
extra_body : Optional[Dict[str, Any]]
|
|
413
|
+
Add additional JSON properties to the request.
|
|
414
|
+
**kwargs
|
|
415
|
+
Additional keyword arguments passed to the OpenAI API.
|
|
416
|
+
|
|
417
|
+
Yields
|
|
418
|
+
------
|
|
419
|
+
MessageChunk
|
|
420
|
+
Individual chunks of the response as they are received from the API.
|
|
421
|
+
Each chunk contains a delta (partial content) and the raw response.
|
|
422
|
+
|
|
423
|
+
Notes
|
|
424
|
+
-----
|
|
425
|
+
This is the asynchronous version of the stream method, suitable for
|
|
426
|
+
concurrent processing and non-blocking streaming operations.
|
|
427
|
+
"""
|
|
428
|
+
params = self._build_parameters(
|
|
429
|
+
messages=messages,
|
|
430
|
+
model=model,
|
|
431
|
+
temperature=temperature,
|
|
432
|
+
top_p=top_p,
|
|
433
|
+
presence_penalty=presence_penalty,
|
|
434
|
+
frequency_penalty=frequency_penalty,
|
|
435
|
+
max_tokens=max_tokens,
|
|
436
|
+
stop=stop,
|
|
437
|
+
extra_body=extra_body,
|
|
438
|
+
stream=True,
|
|
439
|
+
**kwargs,
|
|
440
|
+
)
|
|
441
|
+
# Validate required parameters for streaming chat completion
|
|
442
|
+
validate_required_params(params, ["messages", "model", "stream"])
|
|
443
|
+
|
|
444
|
+
response = await self.async_client.chat.completions.create(**params)
|
|
445
|
+
async for chunk in response:
|
|
446
|
+
if chunk.choices and chunk.choices[0].delta.content:
|
|
447
|
+
delta_content = chunk.choices[0].delta.content
|
|
448
|
+
delta_content = delta_content if delta_content else ""
|
|
449
|
+
yield MessageChunk(delta=delta_content, raw=chunk)
|
|
450
|
+
|
|
451
|
+
def _build_parameters(
|
|
452
|
+
self,
|
|
453
|
+
messages: List[Message],
|
|
454
|
+
model: Optional[str] = None,
|
|
455
|
+
temperature: Optional[float] = None,
|
|
456
|
+
top_p: Optional[float] = None,
|
|
457
|
+
presence_penalty: Optional[float] = None,
|
|
458
|
+
frequency_penalty: Optional[float] = None,
|
|
459
|
+
max_tokens: Optional[int] = None,
|
|
460
|
+
stop: Optional[List[str]] = None,
|
|
461
|
+
tools: Optional[List[Tool]] = None,
|
|
462
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
463
|
+
stream: Optional[bool] = None,
|
|
464
|
+
response_format: Optional[Dict[str, Any]] = None,
|
|
465
|
+
tool_choice: Optional[ChatCompletionToolChoiceOptionParam] = None,
|
|
466
|
+
parallel_tool_calls: Optional[bool] = None,
|
|
467
|
+
**kwargs,
|
|
468
|
+
) -> Dict[str, Any]:
|
|
469
|
+
msgs: List[ChatCompletionMessageParam] = [self._convert_chat_completions_message(msg) for msg in messages]
|
|
470
|
+
|
|
471
|
+
# Handle tools parameter - convert to list if provided, otherwise use empty list
|
|
472
|
+
json_desc_tools = [self._convert_tool_to_json(tool) for tool in tools] if tools is not None else None
|
|
473
|
+
|
|
474
|
+
# Build parameters dictionary and filter out None values
|
|
475
|
+
# The priority order is as follows: configuration passed through the interface > configuration of the instance itself.
|
|
476
|
+
merge_params = merge_dict(self.configuration.model_dump(), {
|
|
477
|
+
"messages": msgs,
|
|
478
|
+
"model": model,
|
|
479
|
+
"temperature": temperature,
|
|
480
|
+
"top_p": top_p,
|
|
481
|
+
"presence_penalty": presence_penalty,
|
|
482
|
+
"frequency_penalty": frequency_penalty,
|
|
483
|
+
"max_tokens": max_tokens,
|
|
484
|
+
"stop": stop,
|
|
485
|
+
"tools": json_desc_tools,
|
|
486
|
+
"extra_body": extra_body,
|
|
487
|
+
"stream": stream,
|
|
488
|
+
"response_format": response_format,
|
|
489
|
+
"tool_choice": tool_choice,
|
|
490
|
+
"parallel_tool_calls": parallel_tool_calls,
|
|
491
|
+
**kwargs,
|
|
492
|
+
})
|
|
493
|
+
|
|
494
|
+
params = filter_dict(merge_params, exclude_none=True)
|
|
495
|
+
return params
|
|
496
|
+
|
|
497
|
+
def _handle_chat_response(self, response: ChatCompletion) -> Response:
|
|
498
|
+
openai_message = response.choices[0].message
|
|
499
|
+
text = openai_message.content if openai_message.content else ""
|
|
500
|
+
|
|
501
|
+
if openai_message.refusal:
|
|
502
|
+
warnings.warn(openai_message.refusal, RuntimeWarning)
|
|
503
|
+
|
|
504
|
+
# Handle tool calls in the response
|
|
505
|
+
# if openai_message.tool_calls:
|
|
506
|
+
# # Create a message with both text content and tool calls
|
|
507
|
+
# blocks = []
|
|
508
|
+
# if text:
|
|
509
|
+
# blocks.append(TextBlock(text=text))
|
|
510
|
+
# else:
|
|
511
|
+
# # Ensure there's always some text content, even if empty
|
|
512
|
+
# blocks.append(TextBlock(text=""))
|
|
513
|
+
|
|
514
|
+
# for tool_call in openai_message.tool_calls:
|
|
515
|
+
# tool_call_block = ToolCallBlock(
|
|
516
|
+
# id=tool_call.id,
|
|
517
|
+
# name=tool_call.function.name,
|
|
518
|
+
# arguments=json.loads(tool_call.function.arguments)
|
|
519
|
+
# )
|
|
520
|
+
# blocks.append(tool_call_block)
|
|
521
|
+
|
|
522
|
+
# message = Message(role=Role.AI, blocks=blocks)
|
|
523
|
+
# else:
|
|
524
|
+
# # Regular text response
|
|
525
|
+
# message = Message.from_text(text, role=Role.AI)
|
|
526
|
+
|
|
527
|
+
return Response(
|
|
528
|
+
message=Message.from_text(text, role=Role.AI),
|
|
529
|
+
raw=response,
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
def _convert_chat_completions_message(self, message: Message) -> ChatCompletionMessageParam:
|
|
533
|
+
"""
|
|
534
|
+
Convert a Bridgic Message to OpenAI ChatCompletionMessageParam.
|
|
535
|
+
|
|
536
|
+
This method handles different message types including:
|
|
537
|
+
- Text messages
|
|
538
|
+
- Messages with tool calls (ToolCallBlock)
|
|
539
|
+
- Messages with tool results (ToolResultBlock)
|
|
540
|
+
|
|
541
|
+
Parameters
|
|
542
|
+
----
|
|
543
|
+
message : Message
|
|
544
|
+
The Bridgic message to convert
|
|
545
|
+
|
|
546
|
+
Returns
|
|
547
|
+
----
|
|
548
|
+
ChatCompletionMessageParam
|
|
549
|
+
The converted OpenAI message parameter
|
|
550
|
+
"""
|
|
551
|
+
# Extract text content from TextBlocks and ToolResultBlocks
|
|
552
|
+
content_list = []
|
|
553
|
+
for block in message.blocks:
|
|
554
|
+
if isinstance(block, TextBlock):
|
|
555
|
+
content_list.append(block.text)
|
|
556
|
+
elif isinstance(block, ToolResultBlock):
|
|
557
|
+
content_list.append(block.content)
|
|
558
|
+
content_txt = "\n\n".join(content_list) if content_list else ""
|
|
559
|
+
|
|
560
|
+
# Extract tool calls from ToolCallBlocks
|
|
561
|
+
tool_calls = []
|
|
562
|
+
for block in message.blocks:
|
|
563
|
+
if isinstance(block, ToolCallBlock):
|
|
564
|
+
tool_call = ChatCompletionMessageToolCallParam(
|
|
565
|
+
id=block.id,
|
|
566
|
+
type="function",
|
|
567
|
+
function=Function(
|
|
568
|
+
name=block.name,
|
|
569
|
+
arguments=json.dumps(block.arguments)
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
tool_calls.append(tool_call)
|
|
573
|
+
|
|
574
|
+
# Handle different message roles
|
|
575
|
+
if message.role == Role.SYSTEM:
|
|
576
|
+
return ChatCompletionSystemMessageParam(content=content_txt, role="system", **message.extras)
|
|
577
|
+
elif message.role == Role.USER:
|
|
578
|
+
return ChatCompletionUserMessageParam(content=content_txt, role="user", **message.extras)
|
|
579
|
+
elif message.role == Role.AI:
|
|
580
|
+
# For AI messages, include tool calls if present
|
|
581
|
+
if tool_calls:
|
|
582
|
+
return ChatCompletionAssistantMessageParam(
|
|
583
|
+
content=content_txt,
|
|
584
|
+
role="assistant",
|
|
585
|
+
tool_calls=tool_calls,
|
|
586
|
+
**message.extras
|
|
587
|
+
)
|
|
588
|
+
else:
|
|
589
|
+
return ChatCompletionAssistantMessageParam(content=content_txt, role="assistant", **message.extras)
|
|
590
|
+
elif message.role == Role.TOOL:
|
|
591
|
+
# For tool messages, extract tool_call_id from ToolResultBlock
|
|
592
|
+
tool_call_id = None
|
|
593
|
+
for block in message.blocks:
|
|
594
|
+
if isinstance(block, ToolResultBlock):
|
|
595
|
+
tool_call_id = block.id
|
|
596
|
+
break
|
|
597
|
+
|
|
598
|
+
if tool_call_id is None:
|
|
599
|
+
raise ValueError("Tool message must contain a ToolResultBlock with an ID")
|
|
600
|
+
|
|
601
|
+
return ChatCompletionToolMessageParam(
|
|
602
|
+
content=content_txt,
|
|
603
|
+
role="tool",
|
|
604
|
+
tool_call_id=tool_call_id,
|
|
605
|
+
**message.extras
|
|
606
|
+
)
|
|
607
|
+
else:
|
|
608
|
+
raise ValueError(f"Invalid role: {message.role}")
|
|
609
|
+
|
|
610
|
+
@overload
|
|
611
|
+
def structured_output(
|
|
612
|
+
self,
|
|
613
|
+
messages: List[Message],
|
|
614
|
+
constraint: PydanticModel,
|
|
615
|
+
model: Optional[str] = None,
|
|
616
|
+
temperature: Optional[float] = ...,
|
|
617
|
+
top_p: Optional[float] = ...,
|
|
618
|
+
presence_penalty: Optional[float] = ...,
|
|
619
|
+
frequency_penalty: Optional[float] = ...,
|
|
620
|
+
extra_body: Optional[Dict[str, Any]] = ...,
|
|
621
|
+
**kwargs,
|
|
622
|
+
) -> BaseModel: ...
|
|
623
|
+
|
|
624
|
+
@overload
|
|
625
|
+
def structured_output(
|
|
626
|
+
self,
|
|
627
|
+
messages: List[Message],
|
|
628
|
+
constraint: JsonSchema,
|
|
629
|
+
model: Optional[str] = None,
|
|
630
|
+
temperature: Optional[float] = ...,
|
|
631
|
+
top_p: Optional[float] = ...,
|
|
632
|
+
presence_penalty: Optional[float] = ...,
|
|
633
|
+
frequency_penalty: Optional[float] = ...,
|
|
634
|
+
extra_body: Optional[Dict[str, Any]] = ...,
|
|
635
|
+
**kwargs,
|
|
636
|
+
) -> Dict[str, Any]: ...
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def structured_output(
|
|
640
|
+
self,
|
|
641
|
+
messages: List[Message],
|
|
642
|
+
constraint: Union[PydanticModel, JsonSchema],
|
|
643
|
+
model: Optional[str] = None,
|
|
644
|
+
temperature: Optional[float] = None,
|
|
645
|
+
top_p: Optional[float] = None,
|
|
646
|
+
presence_penalty: Optional[float] = None,
|
|
647
|
+
frequency_penalty: Optional[float] = None,
|
|
648
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
649
|
+
**kwargs,
|
|
650
|
+
) -> Union[BaseModel, Dict[str, Any]]:
|
|
651
|
+
"""
|
|
652
|
+
Generate structured output in a specified format using OpenAI's structured output API.
|
|
653
|
+
|
|
654
|
+
This method leverages OpenAI's structured output capabilities to ensure the model
|
|
655
|
+
response conforms to a specified schema. Recommended for use with GPT-4o and later models.
|
|
656
|
+
|
|
657
|
+
Parameters
|
|
658
|
+
----------
|
|
659
|
+
messages : List[Message]
|
|
660
|
+
A list of messages comprising the conversation so far.
|
|
661
|
+
constraint : Constraint
|
|
662
|
+
The constraint defining the desired output format (PydanticModel or JsonSchema).
|
|
663
|
+
model : str
|
|
664
|
+
Model ID used to generate the response. Structured outputs work best with GPT-4o and later.
|
|
665
|
+
temperature : Optional[float]
|
|
666
|
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
|
667
|
+
make the output more random, while lower values like 0.2 will make it more
|
|
668
|
+
focused and deterministic.
|
|
669
|
+
top_p : Optional[float]
|
|
670
|
+
An alternative to sampling with temperature, called nucleus sampling, where the
|
|
671
|
+
model considers the results of the tokens with top_p probability mass.
|
|
672
|
+
presence_penalty : Optional[float]
|
|
673
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
674
|
+
whether they appear in the text so far, increasing the model's likelihood to
|
|
675
|
+
talk about new topics.
|
|
676
|
+
frequency_penalty : Optional[float]
|
|
677
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their
|
|
678
|
+
existing frequency in the text so far, decreasing the model's likelihood to
|
|
679
|
+
repeat the same line verbatim.
|
|
680
|
+
extra_body : Optional[Dict[str, Any]]
|
|
681
|
+
Add additional JSON properties to the request.
|
|
682
|
+
**kwargs
|
|
683
|
+
Additional keyword arguments passed to the OpenAI API.
|
|
684
|
+
|
|
685
|
+
Returns
|
|
686
|
+
-------
|
|
687
|
+
Union[BaseModel, Dict[str, Any]]
|
|
688
|
+
The structured response in the format specified by the constraint:
|
|
689
|
+
- BaseModel instance if constraint is PydanticModel
|
|
690
|
+
- Dict[str, Any] if constraint is JsonSchema
|
|
691
|
+
|
|
692
|
+
Examples
|
|
693
|
+
--------
|
|
694
|
+
Using a Pydantic model constraint:
|
|
695
|
+
|
|
696
|
+
```python
|
|
697
|
+
class Answer(BaseModel):
|
|
698
|
+
reasoning: str
|
|
699
|
+
result: int
|
|
700
|
+
|
|
701
|
+
constraint = PydanticModel(model=Answer)
|
|
702
|
+
response = llm.structured_output(
|
|
703
|
+
messages=[Message.from_text("What is 2+2?", role=Role.USER)],
|
|
704
|
+
constraint=constraint,
|
|
705
|
+
model="gpt-4o"
|
|
706
|
+
)
|
|
707
|
+
print(response.reasoning, response.result)
|
|
708
|
+
```
|
|
709
|
+
|
|
710
|
+
Using a JSON schema constraint:
|
|
711
|
+
|
|
712
|
+
```python
|
|
713
|
+
schema = {"type": "object", "properties": {"answer": {"type": "string"}}}
|
|
714
|
+
constraint = JsonSchema(schema=schema)
|
|
715
|
+
response = llm.structured_output(
|
|
716
|
+
messages=[Message.from_text("Hello", role=Role.USER)],
|
|
717
|
+
constraint=constraint,
|
|
718
|
+
model="gpt-4o"
|
|
719
|
+
)
|
|
720
|
+
print(response["answer"])
|
|
721
|
+
```
|
|
722
|
+
|
|
723
|
+
Notes
|
|
724
|
+
-----
|
|
725
|
+
- Utilizes OpenAI's native structured output API with strict schema validation
|
|
726
|
+
- All schemas automatically have additionalProperties set to False
|
|
727
|
+
- Best performance achieved with GPT-4o and later models (gpt-4o-mini, gpt-4o-2024-08-06, and later)
|
|
728
|
+
"""
|
|
729
|
+
params = self._build_parameters(
|
|
730
|
+
messages=messages,
|
|
731
|
+
model=model,
|
|
732
|
+
temperature=temperature,
|
|
733
|
+
top_p=top_p,
|
|
734
|
+
presence_penalty=presence_penalty,
|
|
735
|
+
frequency_penalty=frequency_penalty,
|
|
736
|
+
extra_body=extra_body,
|
|
737
|
+
response_format=self._get_response_format(constraint),
|
|
738
|
+
**kwargs,
|
|
739
|
+
)
|
|
740
|
+
# Validate required parameters for structured output
|
|
741
|
+
validate_required_params(params, ["messages", "model"])
|
|
742
|
+
|
|
743
|
+
response = self.client.chat.completions.parse(**params)
|
|
744
|
+
return self._convert_response(constraint, response.choices[0].message.content)
|
|
745
|
+
|
|
746
|
+
async def astructured_output(
|
|
747
|
+
self,
|
|
748
|
+
messages: List[Message],
|
|
749
|
+
constraint: Union[PydanticModel, JsonSchema],
|
|
750
|
+
model: Optional[str] = None,
|
|
751
|
+
temperature: Optional[float] = None,
|
|
752
|
+
top_p: Optional[float] = None,
|
|
753
|
+
presence_penalty: Optional[float] = None,
|
|
754
|
+
frequency_penalty: Optional[float] = None,
|
|
755
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
756
|
+
**kwargs,
|
|
757
|
+
) -> Union[BaseModel, Dict[str, Any]]:
|
|
758
|
+
"""
|
|
759
|
+
Asynchronously generate structured output in a specified format using OpenAI's API.
|
|
760
|
+
|
|
761
|
+
This is the asynchronous version of structured_output, suitable for concurrent
|
|
762
|
+
processing and non-blocking operations. It leverages OpenAI's structured output
|
|
763
|
+
capabilities to ensure the model response conforms to a specified schema.
|
|
764
|
+
|
|
765
|
+
Parameters
|
|
766
|
+
----------
|
|
767
|
+
messages : List[Message]
|
|
768
|
+
A list of messages comprising the conversation so far.
|
|
769
|
+
constraint : Constraint
|
|
770
|
+
The constraint defining the desired output format (PydanticModel or JsonSchema).
|
|
771
|
+
model : str
|
|
772
|
+
Model ID used to generate the response. Structured outputs work best with GPT-4o and later.
|
|
773
|
+
temperature : Optional[float]
|
|
774
|
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
|
775
|
+
make the output more random, while lower values like 0.2 will make it more
|
|
776
|
+
focused and deterministic.
|
|
777
|
+
top_p : Optional[float]
|
|
778
|
+
An alternative to sampling with temperature, called nucleus sampling, where the
|
|
779
|
+
model considers the results of the tokens with top_p probability mass.
|
|
780
|
+
presence_penalty : Optional[float]
|
|
781
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
782
|
+
whether they appear in the text so far, increasing the model's likelihood to
|
|
783
|
+
talk about new topics.
|
|
784
|
+
frequency_penalty : Optional[float]
|
|
785
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their
|
|
786
|
+
existing frequency in the text so far, decreasing the model's likelihood to
|
|
787
|
+
repeat the same line verbatim.
|
|
788
|
+
extra_body : Optional[Dict[str, Any]]
|
|
789
|
+
Add additional JSON properties to the request.
|
|
790
|
+
**kwargs
|
|
791
|
+
Additional keyword arguments passed to the OpenAI API.
|
|
792
|
+
|
|
793
|
+
Returns
|
|
794
|
+
-------
|
|
795
|
+
Union[BaseModel, Dict[str, Any]]
|
|
796
|
+
The structured response in the format specified by the constraint:
|
|
797
|
+
- BaseModel instance if constraint is PydanticModel
|
|
798
|
+
- Dict[str, Any] if constraint is JsonSchema
|
|
799
|
+
|
|
800
|
+
Examples
|
|
801
|
+
--------
|
|
802
|
+
Using asynchronous structured output:
|
|
803
|
+
|
|
804
|
+
```python
|
|
805
|
+
async def get_structured_response():
|
|
806
|
+
llm = OpenAILlm(api_key="your-key")
|
|
807
|
+
constraint = PydanticModel(model=Answer)
|
|
808
|
+
response = await llm.astructured_output(
|
|
809
|
+
messages=[Message.from_text("Calculate 5+3", role=Role.USER)],
|
|
810
|
+
constraint=constraint,
|
|
811
|
+
model="gpt-4o"
|
|
812
|
+
)
|
|
813
|
+
return response
|
|
814
|
+
```
|
|
815
|
+
|
|
816
|
+
Notes
|
|
817
|
+
-----
|
|
818
|
+
- This is the asynchronous version of structured_output
|
|
819
|
+
- Utilizes OpenAI's native structured output API with strict schema validation
|
|
820
|
+
- Suitable for concurrent processing and high-throughput applications
|
|
821
|
+
- Best performance achieved with GPT-4o and later models (gpt-4o-mini, gpt-4o-2024-08-06, and later)
|
|
822
|
+
"""
|
|
823
|
+
params = self._build_parameters(
|
|
824
|
+
messages=messages,
|
|
825
|
+
model=model,
|
|
826
|
+
temperature=temperature,
|
|
827
|
+
top_p=top_p,
|
|
828
|
+
presence_penalty=presence_penalty,
|
|
829
|
+
frequency_penalty=frequency_penalty,
|
|
830
|
+
extra_body=extra_body,
|
|
831
|
+
response_format=self._get_response_format(constraint),
|
|
832
|
+
**kwargs,
|
|
833
|
+
)
|
|
834
|
+
# Validate required parameters for structured output
|
|
835
|
+
validate_required_params(params, ["messages", "model"])
|
|
836
|
+
|
|
837
|
+
response = await self.async_client.chat.completions.parse(**params)
|
|
838
|
+
return self._convert_response(constraint, response.choices[0].message.content)
|
|
839
|
+
|
|
840
|
+
def _add_schema_properties(self, schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
841
|
+
"""
|
|
842
|
+
OpenAI requires additionalProperties to be set to False for all objects
|
|
843
|
+
in structured output schemas. See:
|
|
844
|
+
[AdditionalProperties False Must Always Be Set in Objects](https://platform.openai.com/docs/guides/structured-outputs?example=moderation#additionalproperties-false-must-always-be-set-in-objects)
|
|
845
|
+
"""
|
|
846
|
+
schema["additionalProperties"] = False
|
|
847
|
+
return schema
|
|
848
|
+
|
|
849
|
+
def _get_response_format(self, constraint: Union[PydanticModel, JsonSchema]) -> Dict[str, Any]:
|
|
850
|
+
if isinstance(constraint, PydanticModel):
|
|
851
|
+
result = {
|
|
852
|
+
"type": "json_schema",
|
|
853
|
+
"json_schema": {
|
|
854
|
+
"schema": self._add_schema_properties(constraint.model.model_json_schema()),
|
|
855
|
+
"name": constraint.model.__name__,
|
|
856
|
+
"strict": True,
|
|
857
|
+
},
|
|
858
|
+
}
|
|
859
|
+
return result
|
|
860
|
+
elif isinstance(constraint, JsonSchema):
|
|
861
|
+
return {
|
|
862
|
+
"type": "json_schema",
|
|
863
|
+
"json_schema": {
|
|
864
|
+
"schema": self._add_schema_properties(constraint.schema_dict),
|
|
865
|
+
# default name for schema
|
|
866
|
+
"name": "schema",
|
|
867
|
+
"strict": True,
|
|
868
|
+
},
|
|
869
|
+
}
|
|
870
|
+
else:
|
|
871
|
+
raise ValueError(f"Unsupported constraint type '{constraint.constraint_type}'. More info about OpenAI structured output: https://platform.openai.com/docs/guides/structured-outputs")
|
|
872
|
+
|
|
873
|
+
def _convert_response(
|
|
874
|
+
self,
|
|
875
|
+
constraint: Union[PydanticModel, JsonSchema],
|
|
876
|
+
content: str,
|
|
877
|
+
) -> Union[BaseModel, Dict[str, Any]]:
|
|
878
|
+
if isinstance(constraint, PydanticModel):
|
|
879
|
+
return constraint.model.model_validate_json(content)
|
|
880
|
+
elif isinstance(constraint, JsonSchema):
|
|
881
|
+
return json.loads(content)
|
|
882
|
+
else:
|
|
883
|
+
raise ValueError(f"Unsupported constraint type '{constraint.constraint_type}'. More info about OpenAI structured output: https://platform.openai.com/docs/guides/structured-outputs")
|
|
884
|
+
|
|
885
|
+
def select_tool(
|
|
886
|
+
self,
|
|
887
|
+
messages: List[Message],
|
|
888
|
+
tools: List[Tool],
|
|
889
|
+
model: Optional[str] = None,
|
|
890
|
+
temperature: Optional[float] = None,
|
|
891
|
+
top_p: Optional[float] = None,
|
|
892
|
+
presence_penalty: Optional[float] = None,
|
|
893
|
+
frequency_penalty: Optional[float] = None,
|
|
894
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
895
|
+
parallel_tool_calls: Optional[bool] = None,
|
|
896
|
+
tool_choice: Union[Literal["auto", "required", "none"], ChatCompletionNamedToolChoiceParam] = None,
|
|
897
|
+
**kwargs,
|
|
898
|
+
) -> Tuple[List[ToolCall], Optional[str]]:
|
|
899
|
+
"""
|
|
900
|
+
Select and invoke tools from a list based on conversation context.
|
|
901
|
+
|
|
902
|
+
This method enables the model to intelligently select and call appropriate tools
|
|
903
|
+
from a provided list based on the conversation context. It supports OpenAI's
|
|
904
|
+
function calling capabilities with parallel execution and various control options.
|
|
905
|
+
|
|
906
|
+
More OpenAI information: [function-calling](https://platform.openai.com/docs/guides/function-calling)
|
|
907
|
+
|
|
908
|
+
Parameters
|
|
909
|
+
----------
|
|
910
|
+
messages : List[Message]
|
|
911
|
+
A list of messages comprising the conversation so far providing context for tool selection.
|
|
912
|
+
tools : List[Tool]
|
|
913
|
+
A list of tools the model may call.
|
|
914
|
+
model : str
|
|
915
|
+
Model ID used to generate the response. Function calling requires compatible models.
|
|
916
|
+
temperature : Optional[float]
|
|
917
|
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
|
918
|
+
make the output more random, while lower values like 0.2 will make it more
|
|
919
|
+
focused and deterministic.
|
|
920
|
+
top_p : Optional[float]
|
|
921
|
+
An alternative to sampling with temperature, called nucleus sampling, where the
|
|
922
|
+
model considers the results of the tokens with top_p probability mass.
|
|
923
|
+
presence_penalty : Optional[float]
|
|
924
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
925
|
+
whether they appear in the text so far, increasing the model's likelihood to
|
|
926
|
+
talk about new topics.
|
|
927
|
+
frequency_penalty : Optional[float]
|
|
928
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their
|
|
929
|
+
existing frequency in the text so far, decreasing the model's likelihood to
|
|
930
|
+
repeat the same line verbatim.
|
|
931
|
+
extra_body : Optional[Dict[str, Any]]
|
|
932
|
+
Add additional JSON properties to the request.
|
|
933
|
+
parallel_tool_calls : Optional[bool]
|
|
934
|
+
Whether to enable parallel function calling during tool use.
|
|
935
|
+
tool_choice : Union[Literal["auto", "required", "none"], ChatCompletionNamedToolChoiceParam]
|
|
936
|
+
Controls which tool, if any, the model may call.
|
|
937
|
+
- `none`: The model will not call any tool and will instead generate a message. This is the default when no tools are provided.
|
|
938
|
+
- `auto`: The model may choose to generate a message or call one or more tools. This is the default when tools are provided.
|
|
939
|
+
- `required`: The model must call one or more tools.
|
|
940
|
+
- To force a specific tool, pass `{"type": "function", "function": {"name": "my_function"}}`.
|
|
941
|
+
**kwargs
|
|
942
|
+
Additional keyword arguments passed to the OpenAI API.
|
|
943
|
+
|
|
944
|
+
Returns
|
|
945
|
+
-------
|
|
946
|
+
List[ToolCall]
|
|
947
|
+
List of selected tool calls with their IDs, names, and parsed arguments.
|
|
948
|
+
Union[str, None]
|
|
949
|
+
The content of the message from the model.
|
|
950
|
+
"""
|
|
951
|
+
params = self._build_parameters(
|
|
952
|
+
messages=messages,
|
|
953
|
+
model=model,
|
|
954
|
+
temperature=temperature,
|
|
955
|
+
top_p=top_p,
|
|
956
|
+
presence_penalty=presence_penalty,
|
|
957
|
+
frequency_penalty=frequency_penalty,
|
|
958
|
+
tools=tools,
|
|
959
|
+
tool_choice=tool_choice,
|
|
960
|
+
parallel_tool_calls=parallel_tool_calls,
|
|
961
|
+
extra_body=extra_body,
|
|
962
|
+
**kwargs,
|
|
963
|
+
)
|
|
964
|
+
# Validate required parameters for tool selection
|
|
965
|
+
validate_required_params(params, ["messages", "model"])
|
|
966
|
+
|
|
967
|
+
response: ChatCompletion = self.client.chat.completions.create(**params)
|
|
968
|
+
tool_calls = response.choices[0].message.tool_calls
|
|
969
|
+
content = response.choices[0].message.content
|
|
970
|
+
return (self._convert_tool_calls(tool_calls), content)
|
|
971
|
+
|
|
972
|
+
async def aselect_tool(
|
|
973
|
+
self,
|
|
974
|
+
messages: List[Message],
|
|
975
|
+
tools: List[Tool],
|
|
976
|
+
model: Optional[str] = None,
|
|
977
|
+
temperature: Optional[float] = None,
|
|
978
|
+
top_p: Optional[float] = None,
|
|
979
|
+
presence_penalty: Optional[float] = None,
|
|
980
|
+
frequency_penalty: Optional[float] = None,
|
|
981
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
982
|
+
parallel_tool_calls: Optional[bool] = None,
|
|
983
|
+
tool_choice: Union[Literal["auto", "required", "none"], ChatCompletionNamedToolChoiceParam] = None,
|
|
984
|
+
**kwargs,
|
|
985
|
+
)-> Tuple[List[ToolCall], Optional[str]]:
|
|
986
|
+
"""
|
|
987
|
+
Select and invoke tools from a list based on conversation context.
|
|
988
|
+
|
|
989
|
+
This method enables the model to intelligently select and call appropriate tools
|
|
990
|
+
from a provided list based on the conversation context. It supports OpenAI's
|
|
991
|
+
function calling capabilities with parallel execution and various control options.
|
|
992
|
+
|
|
993
|
+
More OpenAI information: [function-calling](https://platform.openai.com/docs/guides/function-calling)
|
|
994
|
+
|
|
995
|
+
Parameters
|
|
996
|
+
----------
|
|
997
|
+
messages : List[Message]
|
|
998
|
+
A list of messages comprising the conversation so far providing context for tool selection.
|
|
999
|
+
tools : List[Tool]
|
|
1000
|
+
A list of tools the model may call.
|
|
1001
|
+
model : str
|
|
1002
|
+
Model ID used to generate the response. Function calling requires compatible models.
|
|
1003
|
+
temperature : Optional[float]
|
|
1004
|
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
|
1005
|
+
make the output more random, while lower values like 0.2 will make it more
|
|
1006
|
+
focused and deterministic.
|
|
1007
|
+
top_p : Optional[float]
|
|
1008
|
+
An alternative to sampling with temperature, called nucleus sampling, where the
|
|
1009
|
+
model considers the results of the tokens with top_p probability mass.
|
|
1010
|
+
presence_penalty : Optional[float]
|
|
1011
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
1012
|
+
whether they appear in the text so far, increasing the model's likelihood to
|
|
1013
|
+
talk about new topics.
|
|
1014
|
+
frequency_penalty : Optional[float]
|
|
1015
|
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their
|
|
1016
|
+
existing frequency in the text so far, decreasing the model's likelihood to
|
|
1017
|
+
repeat the same line verbatim.
|
|
1018
|
+
extra_body : Optional[Dict[str, Any]]
|
|
1019
|
+
Add additional JSON properties to the request.
|
|
1020
|
+
parallel_tool_calls : Optional[bool]
|
|
1021
|
+
Whether to enable parallel function calling during tool use.
|
|
1022
|
+
tool_choice : Union[Literal["auto", "required", "none"], ChatCompletionNamedToolChoiceParam]
|
|
1023
|
+
Controls which tool, if any, the model may call.
|
|
1024
|
+
- `none`: The model will not call any tool and will instead generate a message. This is the default when no tools are provided.
|
|
1025
|
+
- `auto`: The model may choose to generate a message or call one or more tools. This is the default when tools are provided.
|
|
1026
|
+
- `required`: The model must call one or more tools.
|
|
1027
|
+
- To force a specific tool, pass `{"type": "function", "function": {"name": "my_function"}}`.
|
|
1028
|
+
|
|
1029
|
+
**kwargs
|
|
1030
|
+
Additional keyword arguments passed to the OpenAI API.
|
|
1031
|
+
|
|
1032
|
+
Returns
|
|
1033
|
+
-------
|
|
1034
|
+
List[ToolCall]
|
|
1035
|
+
List of selected tool calls with their IDs, names, and parsed arguments.
|
|
1036
|
+
Union[str, None]
|
|
1037
|
+
The content of the message from the model.
|
|
1038
|
+
"""
|
|
1039
|
+
params = self._build_parameters(
|
|
1040
|
+
messages=messages,
|
|
1041
|
+
model=model,
|
|
1042
|
+
temperature=temperature,
|
|
1043
|
+
top_p=top_p,
|
|
1044
|
+
presence_penalty=presence_penalty,
|
|
1045
|
+
frequency_penalty=frequency_penalty,
|
|
1046
|
+
tools=tools,
|
|
1047
|
+
tool_choice=tool_choice,
|
|
1048
|
+
parallel_tool_calls=parallel_tool_calls,
|
|
1049
|
+
extra_body=extra_body,
|
|
1050
|
+
**kwargs,
|
|
1051
|
+
)
|
|
1052
|
+
# Validate required parameters for tool selection
|
|
1053
|
+
validate_required_params(params, ["messages", "model"])
|
|
1054
|
+
|
|
1055
|
+
response: ChatCompletion = await self.async_client.chat.completions.create(**params)
|
|
1056
|
+
tool_calls = response.choices[0].message.tool_calls
|
|
1057
|
+
content = response.choices[0].message.content
|
|
1058
|
+
return (self._convert_tool_calls(tool_calls), content)
|
|
1059
|
+
|
|
1060
|
+
def _convert_parameters(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
|
1061
|
+
return {
|
|
1062
|
+
"type": "object",
|
|
1063
|
+
"properties": parameters.get("properties", {}),
|
|
1064
|
+
"required": parameters.get("required", []),
|
|
1065
|
+
"additionalProperties": False
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
def _convert_tool_to_json(self, tool: Tool) -> Dict[str, Any]:
|
|
1069
|
+
return {
|
|
1070
|
+
"type": "function",
|
|
1071
|
+
"function": {
|
|
1072
|
+
"name": tool.name,
|
|
1073
|
+
"description": tool.description,
|
|
1074
|
+
"parameters": self._convert_parameters(tool.parameters),
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
def _convert_tool_calls(self, tool_calls: List[ChatCompletionMessageFunctionToolCall]) -> List[ToolCall]:
|
|
1079
|
+
return [] if tool_calls is None else [
|
|
1080
|
+
ToolCall(
|
|
1081
|
+
id=generate_tool_id(),
|
|
1082
|
+
name=tool_call.function.name,
|
|
1083
|
+
arguments=json.loads(tool_call.function.arguments),
|
|
1084
|
+
) for tool_call in tool_calls
|
|
1085
|
+
]
|
|
1086
|
+
|
|
1087
|
+
@override
|
|
1088
|
+
def dump_to_dict(self) -> Dict[str, Any]:
|
|
1089
|
+
state_dict = {
|
|
1090
|
+
"api_base": self.api_base,
|
|
1091
|
+
"api_key": self.api_key,
|
|
1092
|
+
"timeout": self.timeout,
|
|
1093
|
+
"configuration": self.configuration.model_dump(),
|
|
1094
|
+
}
|
|
1095
|
+
if self.http_client:
|
|
1096
|
+
warnings.warn(
|
|
1097
|
+
"httpx.Client is not serializable, so it will be set to None in the deserialization.",
|
|
1098
|
+
RuntimeWarning,
|
|
1099
|
+
)
|
|
1100
|
+
if self.http_async_client:
|
|
1101
|
+
warnings.warn(
|
|
1102
|
+
"httpx.AsyncClient is not serializable, so it will be set to None in the deserialization.",
|
|
1103
|
+
RuntimeWarning,
|
|
1104
|
+
)
|
|
1105
|
+
return state_dict
|
|
1106
|
+
|
|
1107
|
+
@override
|
|
1108
|
+
def load_from_dict(self, state_dict: Dict[str, Any]) -> None:
|
|
1109
|
+
self.api_base = state_dict["api_base"]
|
|
1110
|
+
self.api_key = state_dict["api_key"]
|
|
1111
|
+
self.timeout = state_dict["timeout"]
|
|
1112
|
+
self.configuration = OpenAIConfiguration(**state_dict.get("configuration", {}))
|
|
1113
|
+
self.http_client = None
|
|
1114
|
+
self.http_async_client = None
|
|
1115
|
+
|
|
1116
|
+
self.client = OpenAI(
|
|
1117
|
+
base_url=self.api_base,
|
|
1118
|
+
api_key=self.api_key,
|
|
1119
|
+
timeout=self.timeout,
|
|
1120
|
+
http_client=self.http_client,
|
|
1121
|
+
)
|
|
1122
|
+
self.async_client = AsyncOpenAI(
|
|
1123
|
+
base_url=self.api_base,
|
|
1124
|
+
api_key=self.api_key,
|
|
1125
|
+
timeout=self.timeout,
|
|
1126
|
+
http_client=self.http_async_client,
|
|
1127
|
+
)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bridgic-llms-openai
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: OpenAI adapters for Bridgic.
|
|
5
|
+
Author-email: Tielei Zhang <zhangtl04@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Requires-Dist: bridgic-core>=0.1.1
|
|
16
|
+
Requires-Dist: bridgic-llms-openai-like>=0.1.1
|
|
17
|
+
Requires-Dist: httpx-aiohttp>=0.1.8
|
|
18
|
+
Requires-Dist: httpx>=0.28.1
|
|
19
|
+
Requires-Dist: openai>=1.60.0
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
Bridgic LLMs Integration
|
|
23
|
+
========================
|
|
24
|
+
|
|
25
|
+
This package integrates OpenAI LLM into the Bridgic framework, providing the most basic chat/stream interface and implementing several protocols based on the OpenAI API.
|
|
26
|
+
|
|
27
|
+
Installation
|
|
28
|
+
------------
|
|
29
|
+
|
|
30
|
+
```shell
|
|
31
|
+
pip install bridgic-llms-openai
|
|
32
|
+
```
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
bridgic/llms/openai/__init__.py,sha256=6lT5zIGIZ5114_SFzgmtpzeYaq6fC2ug9duUJ-yOEpU,617
|
|
2
|
+
bridgic/llms/openai/_openai_llm.py,sha256=8iIVM96ONUwaZUCNXsN6ruEDuykY6AIFwTLiZ_2Unhg,49130
|
|
3
|
+
bridgic_llms_openai-0.1.2.dist-info/METADATA,sha256=jLzXiyQHNg091X81GNfc9QfZQjGr3BOs3tlciNl5MAw,1021
|
|
4
|
+
bridgic_llms_openai-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
5
|
+
bridgic_llms_openai-0.1.2.dist-info/licenses/LICENSE,sha256=f9RZk4nzmfthTiwLcCppWR0L-UolLD7J47uduHHeJhA,1108
|
|
6
|
+
bridgic_llms_openai-0.1.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 北京比特天空科技有限公司 (BitSky Inc).
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|