gllm-inference-binary 0.5.53__cp312-cp312-win_amd64.whl → 0.5.55__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-inference-binary might be problematic. Click here for more details.
- gllm_inference/constants.pyi +0 -1
- gllm_inference/lm_invoker/anthropic_lm_invoker.pyi +92 -108
- gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +92 -109
- gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +51 -65
- gllm_inference/lm_invoker/datasaur_lm_invoker.pyi +36 -36
- gllm_inference/lm_invoker/google_lm_invoker.pyi +108 -117
- gllm_inference/lm_invoker/langchain_lm_invoker.pyi +52 -64
- gllm_inference/lm_invoker/litellm_lm_invoker.pyi +86 -106
- gllm_inference/lm_invoker/lm_invoker.pyi +1 -1
- gllm_inference/lm_invoker/openai_chat_completions_lm_invoker.pyi +86 -105
- gllm_inference/lm_invoker/openai_lm_invoker.pyi +157 -186
- gllm_inference/lm_invoker/portkey_lm_invoker.pyi +104 -68
- gllm_inference/lm_invoker/xai_lm_invoker.pyi +92 -128
- gllm_inference/schema/__init__.pyi +3 -3
- gllm_inference/schema/attachment.pyi +1 -1
- gllm_inference/schema/enums.pyi +11 -0
- gllm_inference/schema/events.pyi +2 -2
- gllm_inference/schema/lm_output.pyi +167 -23
- gllm_inference.cp312-win_amd64.pyd +0 -0
- gllm_inference.pyi +0 -1
- {gllm_inference_binary-0.5.53.dist-info → gllm_inference_binary-0.5.55.dist-info}/METADATA +2 -2
- {gllm_inference_binary-0.5.53.dist-info → gllm_inference_binary-0.5.55.dist-info}/RECORD +24 -24
- {gllm_inference_binary-0.5.53.dist-info → gllm_inference_binary-0.5.55.dist-info}/WHEEL +0 -0
- {gllm_inference_binary-0.5.53.dist-info → gllm_inference_binary-0.5.55.dist-info}/top_level.txt +0 -0
|
@@ -76,237 +76,199 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
76
76
|
result = await lm_invoker.invoke([text, image])
|
|
77
77
|
```
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
`tool_calls` attribute in the output.
|
|
84
|
-
|
|
85
|
-
Usage example:
|
|
86
|
-
```python
|
|
87
|
-
lm_invoker = OpenAILMInvoker(..., tools=[tool_1, tool_2])
|
|
88
|
-
```
|
|
79
|
+
Text output:
|
|
80
|
+
The `OpenAILMInvoker` generates text outputs by default.
|
|
81
|
+
Text outputs are stored in the `outputs` attribute of the `LMOutput` object and can be accessed
|
|
82
|
+
via the `texts` (all text outputs) or `text` (first text output) properties.
|
|
89
83
|
|
|
90
84
|
Output example:
|
|
91
85
|
```python
|
|
92
|
-
LMOutput(
|
|
93
|
-
response="Let me call the tools...",
|
|
94
|
-
tool_calls=[
|
|
95
|
-
ToolCall(id="123", name="tool_1", args={"key": "value"}),
|
|
96
|
-
ToolCall(id="456", name="tool_2", args={"key": "value"}),
|
|
97
|
-
]
|
|
98
|
-
)
|
|
86
|
+
LMOutput(outputs=[LMOutputItem(type="text", output="Hello, there!")])
|
|
99
87
|
```
|
|
100
88
|
|
|
101
89
|
Structured output:
|
|
102
|
-
|
|
90
|
+
The `OpenAILMInvoker` can be configured to generate structured outputs.
|
|
103
91
|
This feature can be enabled by providing a schema to the `response_schema` parameter.
|
|
104
92
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
For this reason, it is recommended to create the JSON schema using Pydantic\'s `model_json_schema` method.
|
|
108
|
-
|
|
109
|
-
The language model also doesn\'t need to stream anything when structured output is enabled. Thus, standard
|
|
110
|
-
invocation will be performed regardless of whether the `event_emitter` parameter is provided or not.
|
|
93
|
+
Structured outputs are stored in the `outputs` attribute of the `LMOutput` object and can be accessed
|
|
94
|
+
via the `structureds` (all structured outputs) or `structured` (first structured output) properties.
|
|
111
95
|
|
|
112
|
-
|
|
113
|
-
1.
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
schema = {
|
|
120
|
-
"title": "Animal",
|
|
121
|
-
"description": "A description of an animal.",
|
|
122
|
-
"properties": {
|
|
123
|
-
"color": {"title": "Color", "type": "string"},
|
|
124
|
-
"name": {"title": "Name", "type": "string"},
|
|
125
|
-
},
|
|
126
|
-
"required": ["name", "color"],
|
|
127
|
-
"type": "object",
|
|
128
|
-
}
|
|
129
|
-
lm_invoker = OpenAILMInvoker(..., response_schema=schema)
|
|
130
|
-
```
|
|
131
|
-
Output example:
|
|
132
|
-
```python
|
|
133
|
-
LMOutput(structured_output={"name": "Golden retriever", "color": "Golden"})
|
|
134
|
-
```
|
|
96
|
+
The schema must either be one of the following:
|
|
97
|
+
1. A Pydantic BaseModel class
|
|
98
|
+
The structured output will be a Pydantic model.
|
|
99
|
+
2. A JSON schema dictionary
|
|
100
|
+
JSON dictionary schema must be compatible with Pydantic\'s JSON schema, especially for complex schemas.
|
|
101
|
+
Thus, it is recommended to create the JSON schema using Pydantic\'s `model_json_schema` method.
|
|
102
|
+
The structured output will be a dictionary.
|
|
135
103
|
|
|
136
|
-
# Example 2: Using a Pydantic BaseModel class
|
|
137
104
|
Usage example:
|
|
138
105
|
```python
|
|
139
106
|
class Animal(BaseModel):
|
|
140
107
|
name: str
|
|
141
108
|
color: str
|
|
142
109
|
|
|
143
|
-
|
|
110
|
+
json_schema = Animal.model_json_schema()
|
|
111
|
+
|
|
112
|
+
lm_invoker = OpenAILMInvoker(..., response_schema=Animal) # Using Pydantic BaseModel class
|
|
113
|
+
lm_invoker = OpenAILMInvoker(..., response_schema=json_schema) # Using JSON schema dictionary
|
|
144
114
|
```
|
|
115
|
+
|
|
145
116
|
Output example:
|
|
146
117
|
```python
|
|
147
|
-
|
|
118
|
+
# Using Pydantic BaseModel class outputs a Pydantic model
|
|
119
|
+
LMOutput(outputs=[LMOutputItem(type="structured", output=Animal(name="dog", color="white"))])
|
|
120
|
+
|
|
121
|
+
# Using JSON schema dictionary outputs a dictionary
|
|
122
|
+
LMOutput(outputs=[LMOutputItem(type="structured", output={"name": "dog", "color": "white"})])
|
|
148
123
|
```
|
|
149
124
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
125
|
+
When structured output is enabled, streaming is disabled.
|
|
126
|
+
|
|
127
|
+
Tool calling:
|
|
128
|
+
The `OpenAILMInvoker` can be configured to call tools to perform certain tasks.
|
|
129
|
+
This feature can be enabled by providing a list of `Tool` objects to the `tools` parameter.
|
|
130
|
+
|
|
131
|
+
Tool calls outputs are stored in the `outputs` attribute of the `LMOutput` object and
|
|
132
|
+
can be accessed via the `tool_calls` property.
|
|
133
|
+
|
|
134
|
+
Usage example:
|
|
135
|
+
```python
|
|
136
|
+
lm_invoker = OpenAILMInvoker(..., tools=[tool_1, tool_2])
|
|
137
|
+
```
|
|
157
138
|
|
|
158
139
|
Output example:
|
|
159
140
|
```python
|
|
160
141
|
LMOutput(
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
output_token_details=OutputTokenDetails(reasoning_tokens=180, response_tokens=20),
|
|
167
|
-
),
|
|
168
|
-
duration=0.729,
|
|
169
|
-
finish_details={"status": "completed", "incomplete_details": {"reason": None}},
|
|
142
|
+
outputs=[
|
|
143
|
+
LMOutputItem(type="text", output="I\'m using tools..."),
|
|
144
|
+
LMOutputItem(type="tool_call", output=ToolCall(id="123", name="tool_1", args={"key": "value"})),
|
|
145
|
+
LMOutputItem(type="tool_call", output=ToolCall(id="456", name="tool_2", args={"key": "value"})),
|
|
146
|
+
]
|
|
170
147
|
)
|
|
171
148
|
```
|
|
172
149
|
|
|
173
|
-
|
|
174
|
-
The `OpenAILMInvoker`
|
|
175
|
-
|
|
176
|
-
They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
|
|
150
|
+
MCP tool calling:
|
|
151
|
+
The `OpenAILMInvoker` can be configured to call MCP tools to perform certain tasks.
|
|
152
|
+
This feature can be enabled by providing a list of MCP servers to the `mcp_servers` parameter.
|
|
177
153
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
181
|
-
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
182
|
-
retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
|
|
183
|
-
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
184
|
-
```
|
|
154
|
+
MCP calls outputs are stored in the `outputs` attribute of the `LMOutput` object and
|
|
155
|
+
can be accessed via the `mcp_calls` property.
|
|
185
156
|
|
|
186
157
|
Usage example:
|
|
187
158
|
```python
|
|
188
|
-
|
|
189
|
-
```
|
|
159
|
+
from gllm_inference.schema import MCPServer
|
|
190
160
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
|
|
195
|
-
|
|
196
|
-
The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
|
|
197
|
-
will guide the models on how many reasoning tokens it should generate before creating a response.
|
|
198
|
-
Available options include:
|
|
199
|
-
1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
|
|
200
|
-
2. "low": Favors speed and economical token usage.
|
|
201
|
-
3. "medium": Favors a balance between speed and reasoning accuracy.
|
|
202
|
-
4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
|
|
203
|
-
When not set, the reasoning effort will be equivalent to `medium` by default.
|
|
204
|
-
|
|
205
|
-
OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be
|
|
206
|
-
generated. The summary level can be set via the `reasoning_summary` parameter. Available options include:
|
|
207
|
-
1. "auto": The model decides the summary level automatically.
|
|
208
|
-
2. "detailed": The model will generate a detailed summary of the reasoning tokens.
|
|
209
|
-
Reasoning summary is not compatible with tool calling.
|
|
210
|
-
When enabled, the reasoning summary will be stored in the `reasoning` attribute in the output.
|
|
161
|
+
mcp_server_1 = MCPServer(url="https://mcp_server_1.com", name="mcp_server_1")
|
|
162
|
+
lm_invoker = OpenAILMInvoker(..., mcp_servers=[mcp_server_1])
|
|
163
|
+
```
|
|
211
164
|
|
|
212
165
|
Output example:
|
|
213
166
|
```python
|
|
214
167
|
LMOutput(
|
|
215
|
-
|
|
216
|
-
|
|
168
|
+
outputs=[
|
|
169
|
+
LMOutputItem(type="text", output="I\'m using MCP tools..."),
|
|
170
|
+
LMOutputItem(
|
|
171
|
+
type="mcp_call",
|
|
172
|
+
output=MCPCall(
|
|
173
|
+
id="123",
|
|
174
|
+
server_name="mcp_server_1",
|
|
175
|
+
tool_name="mcp_tool_1",
|
|
176
|
+
args={"key": "value"},
|
|
177
|
+
output="The result is 10."
|
|
178
|
+
),
|
|
179
|
+
),
|
|
180
|
+
],
|
|
217
181
|
)
|
|
218
182
|
```
|
|
219
183
|
|
|
220
184
|
Streaming output example:
|
|
221
185
|
```python
|
|
222
|
-
{"type": "
|
|
223
|
-
{"type": "
|
|
224
|
-
{"type": "
|
|
225
|
-
{"type": "
|
|
226
|
-
{"type": "response", "value": "Golden retriever ", ...}
|
|
227
|
-
{"type": "response", "value": "is a good dog breed.", ...}
|
|
186
|
+
{"type": "activity", "value": {"type": "mcp_list_tools", ...}, ...}
|
|
187
|
+
{"type": "activity", "value": {"type": "mcp_call", ...}, ...}
|
|
188
|
+
{"type": "response", "value": "The result ", ...}
|
|
189
|
+
{"type": "response", "value": "is 10.", ...}
|
|
228
190
|
```
|
|
229
|
-
Note: By default, the
|
|
191
|
+
Note: By default, the activity token will be streamed with the legacy `EventType.DATA` event type.
|
|
230
192
|
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
231
193
|
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
232
194
|
|
|
233
|
-
|
|
195
|
+
Reasoning:
|
|
196
|
+
The `OpenAILMInvoker` performs step-by-step reasoning before generating a response when reasoning
|
|
197
|
+
models are used, such as GPT-5 models and o-series models.
|
|
234
198
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
199
|
+
The reasoning effort can be set via the `reasoning_effort` parameter, which guides the models on the amount
|
|
200
|
+
of reasoning tokens to generate. Available options include `minimal`, `low`, `medium`, and `high`.
|
|
201
|
+
|
|
202
|
+
While the raw reasoning tokens are not available, the summary of the reasoning tokens can still be generated.
|
|
203
|
+
This can be done by passing the desired summary level via the `reasoning_summary` parameter.
|
|
204
|
+
Available options include `auto` and `detailed`.
|
|
205
|
+
|
|
206
|
+
Reasoning summaries are stored in the `outputs` attribute of the `LMOutput` object
|
|
207
|
+
and can be accessed via the `thinkings` property.
|
|
239
208
|
|
|
240
209
|
Usage example:
|
|
241
210
|
```python
|
|
242
|
-
|
|
243
|
-
mcp_server_1 = MCPServer(
|
|
244
|
-
url="https://mcp_server_1.com",
|
|
245
|
-
name="mcp_server_1",
|
|
246
|
-
)
|
|
247
|
-
lm_invoker = OpenAILMInvoker(..., mcp_servers=[mcp_server_1])
|
|
211
|
+
lm_invoker = OpenAILMInvoker(..., reasoning_effort="high", reasoning_summary="detailed")
|
|
248
212
|
```
|
|
249
213
|
|
|
250
214
|
Output example:
|
|
251
215
|
```python
|
|
252
216
|
LMOutput(
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
server_name="mcp_server_1",
|
|
258
|
-
tool_name="mcp_tool_1",
|
|
259
|
-
args={"key": "value"},
|
|
260
|
-
output="The result is 10.",
|
|
261
|
-
),
|
|
262
|
-
],
|
|
217
|
+
outputs=[
|
|
218
|
+
LMOutputItem(type="thinking", output=Reasoning(type="thinking", reasoning="I\'m thinking...", ...)),
|
|
219
|
+
LMOutputItem(type="text", output="Golden retriever is a good dog breed."),
|
|
220
|
+
]
|
|
263
221
|
)
|
|
264
222
|
```
|
|
265
223
|
|
|
266
224
|
Streaming output example:
|
|
267
225
|
```python
|
|
268
|
-
{"type": "
|
|
269
|
-
{"type": "
|
|
270
|
-
{"type": "
|
|
271
|
-
{"type": "
|
|
226
|
+
{"type": "thinking_start", "value": "", ...}
|
|
227
|
+
{"type": "thinking", "value": "I\'m ", ...}
|
|
228
|
+
{"type": "thinking", "value": "thinking...", ...}
|
|
229
|
+
{"type": "thinking_end", "value": "", ...}
|
|
230
|
+
{"type": "response", "value": "Golden retriever ", ...}
|
|
231
|
+
{"type": "response", "value": "is a good dog breed.", ...}
|
|
272
232
|
```
|
|
273
|
-
Note: By default, the
|
|
233
|
+
Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
|
|
274
234
|
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
275
235
|
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
276
236
|
|
|
237
|
+
Reasoning summary is not compatible with tool calling.
|
|
238
|
+
|
|
277
239
|
Code interpreter:
|
|
278
|
-
The
|
|
279
|
-
|
|
240
|
+
The `OpenAILMInvoker` can be configured to write and run Python code in a sandboxed environment.
|
|
241
|
+
This is useful for solving complex problems in domains like data analysis, coding, and math.
|
|
280
242
|
This feature can be enabled by setting the `code_interpreter` parameter to `True`.
|
|
281
243
|
|
|
282
|
-
Usage example:
|
|
283
|
-
```python
|
|
284
|
-
lm_invoker = OpenAILMInvoker(..., code_interpreter=True)
|
|
285
|
-
```
|
|
286
|
-
|
|
287
244
|
When code interpreter is enabled, it is highly recommended to instruct the model to use the "python tool"
|
|
288
245
|
in the system message, as "python tool" is the term recognized by the model to refer to the code interpreter.
|
|
289
246
|
|
|
290
|
-
|
|
247
|
+
Code execution results are stored in the `outputs` attribute of the `LMOutput` object and
|
|
248
|
+
can be accessed via the `code_exec_results` property.
|
|
249
|
+
|
|
250
|
+
Usage example:
|
|
291
251
|
```python
|
|
252
|
+
lm_invoker = OpenAILMInvoker(..., code_interpreter=True)
|
|
292
253
|
messages = [
|
|
293
254
|
Message.system("You are a data analyst. Use the python tool to generate a file."]),
|
|
294
255
|
Message.user("Show an histogram of the following data: [1, 2, 1, 4, 1, 2, 4, 2, 3, 1]"),
|
|
295
256
|
]
|
|
257
|
+
result = await lm_invoker.invoke(messages)
|
|
296
258
|
```
|
|
297
259
|
|
|
298
|
-
When code interpreter is enabled, the code execution results are stored in the `code_exec_results`
|
|
299
|
-
attribute in the output.
|
|
300
|
-
|
|
301
260
|
Output example:
|
|
302
261
|
```python
|
|
303
262
|
LMOutput(
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
263
|
+
outputs=[
|
|
264
|
+
LMOutputItem(type="text", output="The histogram is attached."),
|
|
265
|
+
LMOutputItem(
|
|
266
|
+
type="code_exec_result",
|
|
267
|
+
output=CodeExecResult(
|
|
268
|
+
id="123",
|
|
269
|
+
code="import matplotlib.pyplot as plt...",
|
|
270
|
+
output=[Attachment(data=b"...", mime_type="image/png")],
|
|
271
|
+
),
|
|
310
272
|
),
|
|
311
273
|
],
|
|
312
274
|
)
|
|
@@ -325,35 +287,24 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
325
287
|
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
326
288
|
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
327
289
|
|
|
328
|
-
Web
|
|
329
|
-
The
|
|
290
|
+
Web Search:
|
|
291
|
+
The `OpenAILMInvoker` can be configured to search the web for relevant information.
|
|
330
292
|
This feature can be enabled by setting the `web_search` parameter to `True`.
|
|
331
293
|
|
|
294
|
+
Web search citations are stored in the `outputs` attribute of the `LMOutput` object and
|
|
295
|
+
can be accessed via the `citations` property.
|
|
296
|
+
|
|
332
297
|
Usage example:
|
|
333
298
|
```python
|
|
334
299
|
lm_invoker = OpenAILMInvoker(..., web_search=True)
|
|
335
300
|
```
|
|
336
301
|
|
|
337
|
-
When web search is enabled, the language model will search the web for relevant information and may cite the
|
|
338
|
-
relevant sources. The citations will be stored as `Chunk` objects in the `citations` attribute in the output.
|
|
339
|
-
The content of the `Chunk` object is the type of the citation, e.g. "url_citation".
|
|
340
|
-
|
|
341
302
|
Output example:
|
|
342
303
|
```python
|
|
343
304
|
LMOutput(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
id="123",
|
|
348
|
-
content="url_citation",
|
|
349
|
-
metadata={
|
|
350
|
-
"start_index": 164,
|
|
351
|
-
"end_index": 275,
|
|
352
|
-
"title": "Example title",
|
|
353
|
-
"url": "https://www.example.com",
|
|
354
|
-
"type": "url_citation",
|
|
355
|
-
},
|
|
356
|
-
),
|
|
305
|
+
outputs=[
|
|
306
|
+
LMOutputItem(type="citation", output=Chunk(id="123", content="...", metadata={...}, score=None)),
|
|
307
|
+
LMOutputItem(type="text", output="According to recent reports... ([Source](https://example.com))."),
|
|
357
308
|
],
|
|
358
309
|
)
|
|
359
310
|
```
|
|
@@ -361,27 +312,47 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
361
312
|
Streaming output example:
|
|
362
313
|
```python
|
|
363
314
|
{"type": "activity", "value": {"query": "search query"}, ...}
|
|
364
|
-
{"type": "response", "value": "
|
|
365
|
-
{"type": "response", "value": "
|
|
315
|
+
{"type": "response", "value": "According to recent ", ...}
|
|
316
|
+
{"type": "response", "value": "reports... ([Source](https://example.com)).", ...}
|
|
366
317
|
```
|
|
367
318
|
Note: By default, the activity token will be streamed with the legacy `EventType.DATA` event type.
|
|
368
319
|
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
369
320
|
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
370
321
|
|
|
371
|
-
|
|
372
|
-
The
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
322
|
+
Analytics tracking:
|
|
323
|
+
The `OpenAILMInvoker` can be configured to output additional information about the invocation.
|
|
324
|
+
This feature can be enabled by setting the `output_analytics` parameter to `True`.
|
|
325
|
+
|
|
326
|
+
When enabled, the following attributes will be stored in the output:
|
|
327
|
+
1. `token_usage`: The token usage.
|
|
328
|
+
2. `duration`: The duration in seconds.
|
|
329
|
+
3. `finish_details`: The details about how the generation finished.
|
|
330
|
+
|
|
331
|
+
Output example:
|
|
332
|
+
```python
|
|
333
|
+
LMOutput(
|
|
334
|
+
outputs=[...],
|
|
335
|
+
token_usage=TokenUsage(input_tokens=100, output_tokens=50),
|
|
336
|
+
duration=0.729,
|
|
337
|
+
finish_details={"stop_reason": "end_turn"},
|
|
338
|
+
)
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
Retry and timeout:
|
|
342
|
+
The `OpenAILMInvoker` supports retry and timeout configuration.
|
|
343
|
+
By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
|
|
344
|
+
They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
|
|
345
|
+
|
|
346
|
+
Retry config examples:
|
|
347
|
+
```python
|
|
348
|
+
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
349
|
+
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
Usage example:
|
|
353
|
+
```python
|
|
354
|
+
lm_invoker = OpenAILMInvoker(..., retry_config=retry_config)
|
|
355
|
+
```
|
|
385
356
|
'''
|
|
386
357
|
client_kwargs: Incomplete
|
|
387
358
|
def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False, simplify_events: bool = False) -> None:
|