gllm-inference-binary 0.5.38__cp312-cp312-win_amd64.whl → 0.5.41__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-inference-binary might be problematic. Click here for more details.
- gllm_inference/em_invoker/azure_openai_em_invoker.pyi +2 -2
- gllm_inference/em_invoker/bedrock_em_invoker.pyi +2 -2
- gllm_inference/em_invoker/google_em_invoker.pyi +2 -2
- gllm_inference/em_invoker/openai_em_invoker.pyi +2 -2
- gllm_inference/em_invoker/twelevelabs_em_invoker.pyi +2 -2
- gllm_inference/em_invoker/voyage_em_invoker.pyi +2 -2
- gllm_inference/lm_invoker/anthropic_lm_invoker.pyi +22 -28
- gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +24 -29
- gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +10 -20
- gllm_inference/lm_invoker/datasaur_lm_invoker.pyi +11 -21
- gllm_inference/lm_invoker/google_lm_invoker.pyi +46 -28
- gllm_inference/lm_invoker/langchain_lm_invoker.pyi +10 -20
- gllm_inference/lm_invoker/litellm_lm_invoker.pyi +25 -30
- gllm_inference/lm_invoker/lm_invoker.pyi +4 -1
- gllm_inference/lm_invoker/openai_chat_completions_lm_invoker.pyi +22 -28
- gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi +4 -1
- gllm_inference/lm_invoker/openai_lm_invoker.pyi +45 -50
- gllm_inference/lm_invoker/xai_lm_invoker.pyi +26 -42
- gllm_inference/schema/events.pyi +15 -15
- gllm_inference/schema/lm_output.pyi +4 -0
- gllm_inference.cp312-win_amd64.pyd +0 -0
- gllm_inference.pyi +1 -1
- {gllm_inference_binary-0.5.38.dist-info → gllm_inference_binary-0.5.41.dist-info}/METADATA +1 -1
- {gllm_inference_binary-0.5.38.dist-info → gllm_inference_binary-0.5.41.dist-info}/RECORD +26 -26
- {gllm_inference_binary-0.5.38.dist-info → gllm_inference_binary-0.5.41.dist-info}/WHEEL +0 -0
- {gllm_inference_binary-0.5.38.dist-info → gllm_inference_binary-0.5.41.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ from _typeshed import Incomplete
|
|
|
2
2
|
from gllm_core.event import EventEmitter as EventEmitter
|
|
3
3
|
from gllm_core.schema.tool import Tool as Tool
|
|
4
4
|
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
5
|
-
from gllm_inference.lm_invoker.
|
|
5
|
+
from gllm_inference.lm_invoker.openai_chat_completions_lm_invoker import OpenAIChatCompletionsLMInvoker as OpenAIChatCompletionsLMInvoker
|
|
6
6
|
from gllm_inference.lm_invoker.openai_lm_invoker import ReasoningEffort as ReasoningEffort
|
|
7
7
|
from gllm_inference.schema import AttachmentType as AttachmentType, LMOutput as LMOutput, ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema
|
|
8
8
|
from langchain_core.tools import Tool as LangChainTool
|
|
@@ -10,7 +10,7 @@ from typing import Any
|
|
|
10
10
|
|
|
11
11
|
SUPPORTED_ATTACHMENTS: Incomplete
|
|
12
12
|
|
|
13
|
-
class LiteLLMLMInvoker(
|
|
13
|
+
class LiteLLMLMInvoker(OpenAIChatCompletionsLMInvoker):
|
|
14
14
|
'''A language model invoker to interact with language models using LiteLLM.
|
|
15
15
|
|
|
16
16
|
Attributes:
|
|
@@ -156,9 +156,9 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
|
|
|
156
156
|
|
|
157
157
|
Retry config examples:
|
|
158
158
|
```python
|
|
159
|
-
retry_config = RetryConfig(max_retries=0, timeout=
|
|
159
|
+
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
160
160
|
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
161
|
-
retry_config = RetryConfig(max_retries=5, timeout=
|
|
161
|
+
retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
|
|
162
162
|
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
163
163
|
```
|
|
164
164
|
|
|
@@ -192,44 +192,36 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
|
|
|
192
192
|
)
|
|
193
193
|
```
|
|
194
194
|
|
|
195
|
-
When streaming is enabled along with reasoning and the provider supports reasoning output, the reasoning token
|
|
196
|
-
will be streamed with the `EventType.DATA` event type.
|
|
197
|
-
|
|
198
195
|
Streaming output example:
|
|
199
196
|
```python
|
|
200
|
-
{"type": "
|
|
201
|
-
{"type": "
|
|
202
|
-
{"type": "
|
|
203
|
-
{"type": "
|
|
197
|
+
{"type": "thinking_start", "value": ""}\', ...}
|
|
198
|
+
{"type": "thinking", "value": "Let me think "}\', ...}
|
|
199
|
+
{"type": "thinking", "value": "about it..."}\', ...}
|
|
200
|
+
{"type": "thinking_end", "value": ""}\', ...}
|
|
204
201
|
{"type": "response", "value": "Golden retriever ", ...}
|
|
205
202
|
{"type": "response", "value": "is a good dog breed.", ...}
|
|
203
|
+
```
|
|
204
|
+
Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
|
|
205
|
+
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
206
|
+
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
206
207
|
|
|
207
208
|
Setting reasoning-related parameters for non-reasoning models will raise an error.
|
|
208
209
|
|
|
209
210
|
|
|
210
211
|
Output types:
|
|
211
212
|
The output of the `LiteLLMLMInvoker` can either be:
|
|
212
|
-
1. `str`:
|
|
213
|
-
2. `LMOutput`: A Pydantic model
|
|
214
|
-
2.1. response (str)
|
|
215
|
-
2.2. tool_calls (list[ToolCall])
|
|
216
|
-
|
|
217
|
-
2.
|
|
218
|
-
|
|
219
|
-
2.
|
|
220
|
-
|
|
221
|
-
2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
|
|
222
|
-
parameter is set to `True`. Defaults to None.
|
|
223
|
-
2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
|
|
224
|
-
`output_analytics` parameter is set to `True`. Defaults to None.
|
|
225
|
-
2.7. reasoning (list[Reasoning]): The reasoning objects. Currently not supported. Defaults to an empty list.
|
|
226
|
-
2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
|
|
227
|
-
2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
|
|
228
|
-
Defaults to an empty list.
|
|
229
|
-
2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
|
|
213
|
+
1. `str`: A text response.
|
|
214
|
+
2. `LMOutput`: A Pydantic model that may contain the following attributes:
|
|
215
|
+
2.1. response (str)
|
|
216
|
+
2.2. tool_calls (list[ToolCall])
|
|
217
|
+
2.3. structured_output (dict[str, Any] | BaseModel | None)
|
|
218
|
+
2.4. token_usage (TokenUsage | None)
|
|
219
|
+
2.5. duration (float | None)
|
|
220
|
+
2.6. finish_details (dict[str, Any])
|
|
221
|
+
2.7. reasoning (list[Reasoning])
|
|
230
222
|
'''
|
|
231
223
|
completion: Incomplete
|
|
232
|
-
def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
|
|
224
|
+
def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
|
|
233
225
|
"""Initializes a new instance of the LiteLLMLMInvoker class.
|
|
234
226
|
|
|
235
227
|
Args:
|
|
@@ -246,4 +238,7 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
|
|
|
246
238
|
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
247
239
|
reasoning_effort (ReasoningEffort | None, optional): The reasoning effort for reasoning models.
|
|
248
240
|
Defaults to None.
|
|
241
|
+
simplify_events (bool, optional): Temporary parameter to control the streamed events format.
|
|
242
|
+
When True, uses the simplified events format. When False, uses the legacy events format for
|
|
243
|
+
backward compatibility. Will be removed in v0.6. Defaults to False.
|
|
249
244
|
"""
|
|
@@ -56,7 +56,7 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
|
|
|
56
56
|
response_schema: Incomplete
|
|
57
57
|
output_analytics: Incomplete
|
|
58
58
|
retry_config: Incomplete
|
|
59
|
-
def __init__(self, model_id: ModelId, default_hyperparameters: dict[str, Any] | None = None, supported_attachments: set[str] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None) -> None:
|
|
59
|
+
def __init__(self, model_id: ModelId, default_hyperparameters: dict[str, Any] | None = None, supported_attachments: set[str] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, simplify_events: bool = False) -> None:
|
|
60
60
|
"""Initializes a new instance of the BaseLMInvoker class.
|
|
61
61
|
|
|
62
62
|
Args:
|
|
@@ -73,6 +73,9 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
|
|
|
73
73
|
output_analytics (bool, optional): Whether to output the invocation analytics. Defaults to False.
|
|
74
74
|
retry_config (RetryConfig | None, optional): The retry configuration for the language model.
|
|
75
75
|
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
76
|
+
simplify_events (bool, optional): Temporary parameter to control the streamed events format.
|
|
77
|
+
When True, uses the simplified events format. When False, uses the legacy events format for
|
|
78
|
+
backward compatibility. Will be removed in v0.6. Defaults to False.
|
|
76
79
|
"""
|
|
77
80
|
@property
|
|
78
81
|
def model_id(self) -> str:
|
|
@@ -171,9 +171,9 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
|
|
|
171
171
|
|
|
172
172
|
Retry config examples:
|
|
173
173
|
```python
|
|
174
|
-
retry_config = RetryConfig(max_retries=0, timeout=
|
|
174
|
+
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
175
175
|
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
176
|
-
retry_config = RetryConfig(max_retries=5, timeout=
|
|
176
|
+
retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
|
|
177
177
|
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
178
178
|
```
|
|
179
179
|
|
|
@@ -207,44 +207,35 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
|
|
|
207
207
|
)
|
|
208
208
|
```
|
|
209
209
|
|
|
210
|
-
When streaming is enabled along with reasoning and the provider supports reasoning output, the reasoning token
|
|
211
|
-
will be streamed with the `EventType.DATA` event type.
|
|
212
|
-
|
|
213
210
|
Streaming output example:
|
|
214
211
|
```python
|
|
215
|
-
{"type": "
|
|
216
|
-
{"type": "
|
|
217
|
-
{"type": "
|
|
218
|
-
{"type": "
|
|
212
|
+
{"type": "thinking_start", "value": ""}\', ...}
|
|
213
|
+
{"type": "thinking", "value": "Let me think "}\', ...}
|
|
214
|
+
{"type": "thinking", "value": "about it..."}\', ...}
|
|
215
|
+
{"type": "thinking_end", "value": ""}\', ...}
|
|
219
216
|
{"type": "response", "value": "Golden retriever ", ...}
|
|
220
217
|
{"type": "response", "value": "is a good dog breed.", ...}
|
|
221
218
|
```
|
|
219
|
+
Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
|
|
220
|
+
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
221
|
+
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
222
222
|
|
|
223
223
|
Setting reasoning-related parameters for non-reasoning models will raise an error.
|
|
224
224
|
|
|
225
225
|
Output types:
|
|
226
226
|
The output of the `OpenAIChatCompletionsLMInvoker` can either be:
|
|
227
|
-
1. `str`:
|
|
228
|
-
2. `LMOutput`: A Pydantic model
|
|
229
|
-
2.1. response (str)
|
|
230
|
-
2.2. tool_calls (list[ToolCall])
|
|
231
|
-
|
|
232
|
-
2.
|
|
233
|
-
|
|
234
|
-
2.
|
|
235
|
-
|
|
236
|
-
2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
|
|
237
|
-
parameter is set to `True`. Defaults to None.
|
|
238
|
-
2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
|
|
239
|
-
`output_analytics` parameter is set to `True`. Defaults to None.
|
|
240
|
-
2.7. reasoning (list[Reasoning]): The reasoning objects. Currently not supported. Defaults to an empty list.
|
|
241
|
-
2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
|
|
242
|
-
2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
|
|
243
|
-
Defaults to an empty list.
|
|
244
|
-
2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
|
|
227
|
+
1. `str`: A text response.
|
|
228
|
+
2. `LMOutput`: A Pydantic model that may contain the following attributes:
|
|
229
|
+
2.1. response (str)
|
|
230
|
+
2.2. tool_calls (list[ToolCall])
|
|
231
|
+
2.3. structured_output (dict[str, Any] | BaseModel | None)
|
|
232
|
+
2.4. token_usage (TokenUsage | None)
|
|
233
|
+
2.5. duration (float | None)
|
|
234
|
+
2.6. finish_details (dict[str, Any])
|
|
235
|
+
2.7. reasoning (list[Reasoning])
|
|
245
236
|
'''
|
|
246
237
|
client_kwargs: Incomplete
|
|
247
|
-
def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
|
|
238
|
+
def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
|
|
248
239
|
'''Initializes a new instance of the OpenAIChatCompletionsLMInvoker class.
|
|
249
240
|
|
|
250
241
|
Args:
|
|
@@ -266,6 +257,9 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
|
|
|
266
257
|
retry_config (RetryConfig | None, optional): The retry configuration for the language model.
|
|
267
258
|
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
268
259
|
reasoning_effort (str | None, optional): The reasoning effort for the language model. Defaults to None.
|
|
260
|
+
simplify_events (bool, optional): Temporary parameter to control the streamed events format.
|
|
261
|
+
When True, uses the simplified events format. When False, uses the legacy events format for
|
|
262
|
+
backward compatibility. Will be removed in v0.6. Defaults to False.
|
|
269
263
|
'''
|
|
270
264
|
def set_response_schema(self, response_schema: ResponseSchema | None) -> None:
|
|
271
265
|
"""Sets the response schema for the OpenAI language model.
|
|
@@ -25,7 +25,7 @@ class OpenAICompatibleLMInvoker(OpenAIChatCompletionsLMInvoker):
|
|
|
25
25
|
|
|
26
26
|
This class is deprecated and will be removed in v0.6. Please use the `OpenAIChatCompletionsLMInvoker` class instead.
|
|
27
27
|
"""
|
|
28
|
-
def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
|
|
28
|
+
def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
|
|
29
29
|
'''Initializes a new instance of the OpenAICompatibleLMInvoker class.
|
|
30
30
|
|
|
31
31
|
Args:
|
|
@@ -46,4 +46,7 @@ class OpenAICompatibleLMInvoker(OpenAIChatCompletionsLMInvoker):
|
|
|
46
46
|
retry_config (RetryConfig | None, optional): The retry configuration for the language model.
|
|
47
47
|
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
48
48
|
reasoning_effort (str | None, optional): The reasoning effort for the language model. Defaults to None.
|
|
49
|
+
simplify_events (bool, optional): Temporary parameter to control the streamed events format.
|
|
50
|
+
When True, uses the simplified events format. When False, uses the legacy events format for
|
|
51
|
+
backward compatibility. Will be removed in v0.6. Defaults to False.
|
|
49
52
|
'''
|
|
@@ -11,7 +11,8 @@ from langchain_core.tools import Tool as LangChainTool
|
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
13
|
SUPPORTED_ATTACHMENTS: Incomplete
|
|
14
|
-
|
|
14
|
+
STREAM_DATA_START_TYPE_MAP: Incomplete
|
|
15
|
+
STREAM_DATA_END_TYPE_MAP: Incomplete
|
|
15
16
|
STREAM_DATA_CONTENT_TYPE_MAP: Incomplete
|
|
16
17
|
|
|
17
18
|
class OpenAILMInvoker(BaseLMInvoker):
|
|
@@ -176,9 +177,9 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
176
177
|
|
|
177
178
|
Retry config examples:
|
|
178
179
|
```python
|
|
179
|
-
retry_config = RetryConfig(max_retries=0, timeout=
|
|
180
|
+
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
180
181
|
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
181
|
-
retry_config = RetryConfig(max_retries=5, timeout=
|
|
182
|
+
retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
|
|
182
183
|
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
183
184
|
```
|
|
184
185
|
|
|
@@ -216,18 +217,18 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
216
217
|
)
|
|
217
218
|
```
|
|
218
219
|
|
|
219
|
-
When streaming is enabled along with reasoning summary, the reasoning summary token will be streamed with the
|
|
220
|
-
`EventType.DATA` event type.
|
|
221
|
-
|
|
222
220
|
Streaming output example:
|
|
223
221
|
```python
|
|
224
|
-
{"type": "
|
|
225
|
-
{"type": "
|
|
226
|
-
{"type": "
|
|
227
|
-
{"type": "
|
|
222
|
+
{"type": "thinking_start", "value": ""}\', ...}
|
|
223
|
+
{"type": "thinking", "value": "Let me think "}\', ...}
|
|
224
|
+
{"type": "thinking", "value": "about it..."}\', ...}
|
|
225
|
+
{"type": "thinking_end", "value": ""}\', ...}
|
|
228
226
|
{"type": "response", "value": "Golden retriever ", ...}
|
|
229
227
|
{"type": "response", "value": "is a good dog breed.", ...}
|
|
230
228
|
```
|
|
229
|
+
Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
|
|
230
|
+
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
231
|
+
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
231
232
|
|
|
232
233
|
Setting reasoning-related parameters for non-reasoning models will raise an error.
|
|
233
234
|
|
|
@@ -262,14 +263,16 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
262
263
|
)
|
|
263
264
|
```
|
|
264
265
|
|
|
265
|
-
When streaming is enabled, the MCP call activities will be streamed with the `EventType.DATA` event type.
|
|
266
266
|
Streaming output example:
|
|
267
267
|
```python
|
|
268
|
-
{"type": "
|
|
269
|
-
{"type": "
|
|
268
|
+
{"type": "activity", "value": {"type": "mcp_list_tools", ...}, ...}
|
|
269
|
+
{"type": "activity", "value": {"type": "mcp_call", ...}, ...}
|
|
270
270
|
{"type": "response", "value": "The result ", ...}
|
|
271
271
|
{"type": "response", "value": "is 10.", ...}
|
|
272
272
|
```
|
|
273
|
+
Note: By default, the activity token will be streamed with the legacy `EventType.DATA` event type.
|
|
274
|
+
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
275
|
+
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
273
276
|
|
|
274
277
|
Code interpreter:
|
|
275
278
|
The code interpreter is a feature that allows the language model to write and run Python code in a
|
|
@@ -287,14 +290,8 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
287
290
|
Messages example:
|
|
288
291
|
```python
|
|
289
292
|
messages = [
|
|
290
|
-
Message(
|
|
291
|
-
|
|
292
|
-
contents=["You are a data analyst. Use the python tool to generate a file."],
|
|
293
|
-
),
|
|
294
|
-
Message(
|
|
295
|
-
role=MessageRole.USER,
|
|
296
|
-
contents=["Show an histogram of the following data: [1, 2, 1, 4, 1, 2, 4, 2, 3, 1]"],
|
|
297
|
-
),
|
|
293
|
+
Message.system("You are a data analyst. Use the python tool to generate a file."]),
|
|
294
|
+
Message.user("Show an histogram of the following data: [1, 2, 1, 4, 1, 2, 4, 2, 3, 1]"),
|
|
298
295
|
]
|
|
299
296
|
```
|
|
300
297
|
|
|
@@ -315,16 +312,18 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
315
312
|
)
|
|
316
313
|
```
|
|
317
314
|
|
|
318
|
-
When streaming is enabled, the executed code will be streamed with the `EventType.DATA` event type.
|
|
319
315
|
Streaming output example:
|
|
320
316
|
```python
|
|
321
|
-
{"type": "
|
|
322
|
-
{"type": "
|
|
323
|
-
{"type": "
|
|
324
|
-
{"type": "
|
|
317
|
+
{"type": "code_start", "value": ""}\', ...}
|
|
318
|
+
{"type": "code", "value": "import matplotlib"}\', ...}
|
|
319
|
+
{"type": "code", "value": ".pyplot as plt..."}\', ...}
|
|
320
|
+
{"type": "code_end", "value": ""}\', ...}
|
|
325
321
|
{"type": "response", "value": "The histogram ", ...}
|
|
326
322
|
{"type": "response", "value": "is attached.", ...}
|
|
327
323
|
```
|
|
324
|
+
Note: By default, the code token will be streamed with the legacy `EventType.DATA` event type.
|
|
325
|
+
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
326
|
+
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
328
327
|
|
|
329
328
|
Web search:
|
|
330
329
|
The web search is a feature that allows the language model to search the web for relevant information.
|
|
@@ -359,40 +358,33 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
359
358
|
)
|
|
360
359
|
```
|
|
361
360
|
|
|
362
|
-
When streaming is enabled, the web search activities will be streamed with the `EventType.DATA` event type.
|
|
363
361
|
Streaming output example:
|
|
364
362
|
```python
|
|
365
|
-
{"type": "
|
|
363
|
+
{"type": "activity", "value": {"query": "search query"}, ...}
|
|
366
364
|
{"type": "response", "value": "The winner of the match ", ...}
|
|
367
365
|
{"type": "response", "value": "is team A ([Example title](https://www.example.com)).", ...}
|
|
368
366
|
```
|
|
367
|
+
Note: By default, the activity token will be streamed with the legacy `EventType.DATA` event type.
|
|
368
|
+
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
369
|
+
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
369
370
|
|
|
370
371
|
Output types:
|
|
371
372
|
The output of the `OpenAILMInvoker` can either be:
|
|
372
|
-
1. `str`:
|
|
373
|
-
2. `LMOutput`: A Pydantic model
|
|
374
|
-
2.1. response (str)
|
|
375
|
-
2.2. tool_calls (list[ToolCall])
|
|
376
|
-
|
|
377
|
-
2.
|
|
378
|
-
|
|
379
|
-
2.
|
|
380
|
-
|
|
381
|
-
2.
|
|
382
|
-
|
|
383
|
-
2.
|
|
384
|
-
`output_analytics` parameter is set to `True`. Defaults to None.
|
|
385
|
-
2.7. reasoning (list[Reasoning]): The reasoning objects, if the `reasoning_summary` parameter is provided
|
|
386
|
-
for reasoning models. Defaults to an empty list.
|
|
387
|
-
2.8. citations (list[Chunk]): The citations, if the web_search is enabled and the language model decides
|
|
388
|
-
to cite the relevant sources. Defaults to an empty list.
|
|
389
|
-
2.9. code_exec_results (list[CodeExecResult]): The code execution results, if the code interpreter is
|
|
390
|
-
enabled and the language model decides to execute any codes. Defaults to an empty list.
|
|
391
|
-
2.10. mcp_calls (list[MCPCall]): The MCP calls, if the MCP servers are provided and the language model
|
|
392
|
-
decides to invoke MCP tools. Defaults to an empty list.
|
|
373
|
+
1. `str`: A text response.
|
|
374
|
+
2. `LMOutput`: A Pydantic model that may contain the following attributes:
|
|
375
|
+
2.1. response (str)
|
|
376
|
+
2.2. tool_calls (list[ToolCall])
|
|
377
|
+
2.3. structured_output (dict[str, Any] | BaseModel | None)
|
|
378
|
+
2.4. token_usage (TokenUsage | None)
|
|
379
|
+
2.5. duration (float | None)
|
|
380
|
+
2.6. finish_details (dict[str, Any])
|
|
381
|
+
2.7. reasoning (list[Reasoning])
|
|
382
|
+
2.8. citations (list[Chunk])
|
|
383
|
+
2.9. code_exec_results (list[CodeExecResult])
|
|
384
|
+
2.10. mcp_calls (list[MCPCall])
|
|
393
385
|
'''
|
|
394
386
|
client_kwargs: Incomplete
|
|
395
|
-
def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
|
|
387
|
+
def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False, simplify_events: bool = False) -> None:
|
|
396
388
|
'''Initializes a new instance of the OpenAILMInvoker class.
|
|
397
389
|
|
|
398
390
|
Args:
|
|
@@ -421,6 +413,9 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
421
413
|
language model. Defaults to None.
|
|
422
414
|
code_interpreter (bool, optional): Whether to enable the code interpreter. Defaults to False.
|
|
423
415
|
web_search (bool, optional): Whether to enable the web search. Defaults to False.
|
|
416
|
+
simplify_events (bool, optional): Temporary parameter to control the streamed events format.
|
|
417
|
+
When True, uses the simplified events format. When False, uses the legacy events format for
|
|
418
|
+
backward compatibility. Will be removed in v0.6. Defaults to False.
|
|
424
419
|
|
|
425
420
|
Raises:
|
|
426
421
|
ValueError:
|
|
@@ -153,18 +153,18 @@ class XAILMInvoker(BaseLMInvoker):
|
|
|
153
153
|
)
|
|
154
154
|
```
|
|
155
155
|
|
|
156
|
-
When streaming is enabled along with reasoning summary, the reasoning summary token will be streamed with the
|
|
157
|
-
`EventType.DATA` event type.
|
|
158
|
-
|
|
159
156
|
Streaming output example:
|
|
160
157
|
```python
|
|
161
|
-
{"type": "
|
|
162
|
-
{"type": "
|
|
163
|
-
{"type": "
|
|
164
|
-
{"type": "
|
|
158
|
+
{"type": "thinking_start", "value": ""}\', ...}
|
|
159
|
+
{"type": "thinking", "value": "Let me think "}\', ...}
|
|
160
|
+
{"type": "thinking", "value": "about it..."}\', ...}
|
|
161
|
+
{"type": "thinking_end", "value": ""}\', ...}
|
|
165
162
|
{"type": "response", "value": "Golden retriever ", ...}
|
|
166
163
|
{"type": "response", "value": "is a good dog breed.", ...}
|
|
167
164
|
```
|
|
165
|
+
Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
|
|
166
|
+
To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
|
|
167
|
+
LM invoker initialization. The legacy event format support will be removed in v0.6.
|
|
168
168
|
|
|
169
169
|
Setting reasoning-related parameters for non-reasoning models will raise an error.
|
|
170
170
|
|
|
@@ -194,9 +194,9 @@ class XAILMInvoker(BaseLMInvoker):
|
|
|
194
194
|
|
|
195
195
|
Retry config examples:
|
|
196
196
|
```python
|
|
197
|
-
retry_config = RetryConfig(max_retries=0, timeout=
|
|
197
|
+
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
198
198
|
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
199
|
-
retry_config = RetryConfig(max_retries=5, timeout=
|
|
199
|
+
retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
|
|
200
200
|
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
201
201
|
```
|
|
202
202
|
|
|
@@ -218,13 +218,13 @@ class XAILMInvoker(BaseLMInvoker):
|
|
|
218
218
|
```
|
|
219
219
|
|
|
220
220
|
When web search is enabled, the language model will search for relevant information and may cite the
|
|
221
|
-
relevant sources (including from X platform). The citations will be stored as `Chunk` objects in the
|
|
222
|
-
attribute in the output.
|
|
221
|
+
relevant sources (including from X platform). The citations will be stored as `Chunk` objects in the
|
|
222
|
+
`citations` attribute in the output.
|
|
223
223
|
|
|
224
224
|
Output example:
|
|
225
225
|
```python
|
|
226
226
|
LMOutput(
|
|
227
|
-
response="According to recent reports, the latest AI developments
|
|
227
|
+
response="According to recent reports, the latest AI developments... ([Source](https://example.com)).",
|
|
228
228
|
citations=[
|
|
229
229
|
Chunk(
|
|
230
230
|
id="search_result_1",
|
|
@@ -241,42 +241,23 @@ class XAILMInvoker(BaseLMInvoker):
|
|
|
241
241
|
)
|
|
242
242
|
```
|
|
243
243
|
|
|
244
|
-
When streaming is enabled, the live search activities will be streamed with the `EventType.DATA` event type.
|
|
245
|
-
This allows you to track the search process in real-time.
|
|
246
|
-
|
|
247
|
-
Streaming output example:
|
|
248
|
-
```python
|
|
249
|
-
{"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"query\\": \\"search query\\"}", ...}\', ...}
|
|
250
|
-
{"type": "response", "value": "According to recent reports, ", ...}
|
|
251
|
-
{"type": "response", "value": "the latest AI developments include...", ...}
|
|
252
|
-
```
|
|
253
|
-
|
|
254
244
|
Output types:
|
|
255
245
|
The output of the `XAILMInvoker` can either be:
|
|
256
|
-
1. `str`:
|
|
257
|
-
2. `LMOutput`: A Pydantic model
|
|
258
|
-
2.1. response (str)
|
|
259
|
-
2.2. tool_calls (list[ToolCall])
|
|
260
|
-
|
|
261
|
-
2.
|
|
262
|
-
|
|
263
|
-
2.
|
|
264
|
-
|
|
265
|
-
2.
|
|
266
|
-
parameter is set to `True`. Defaults to None.
|
|
267
|
-
2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
|
|
268
|
-
`output_analytics` parameter is set to `True`. Defaults to None.
|
|
269
|
-
2.7. reasoning (list[Reasoning]): The reasoning objects, if the `reasoning_effort` parameter is set.
|
|
270
|
-
Defaults to an empty list.
|
|
271
|
-
2.8. citations (list[Chunk]): The citations, if the web_search is enabled and the language model decides
|
|
272
|
-
to cite the relevant sources. Defaults to an empty list.
|
|
273
|
-
2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
|
|
274
|
-
Defaults to an empty list.
|
|
246
|
+
1. `str`: A text response.
|
|
247
|
+
2. `LMOutput`: A Pydantic model that may contain the following attributes:
|
|
248
|
+
2.1. response (str)
|
|
249
|
+
2.2. tool_calls (list[ToolCall])
|
|
250
|
+
2.3. structured_output (dict[str, Any] | BaseModel | None)
|
|
251
|
+
2.4. token_usage (TokenUsage | None)
|
|
252
|
+
2.5. duration (float | None)
|
|
253
|
+
2.6. finish_details (dict[str, Any])
|
|
254
|
+
2.7. reasoning (list[Reasoning])
|
|
255
|
+
2.8. citations (list[Chunk])
|
|
275
256
|
'''
|
|
276
257
|
reasoning_effort: Incomplete
|
|
277
258
|
web_search: Incomplete
|
|
278
259
|
client_params: Incomplete
|
|
279
|
-
def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, web_search: bool = False) -> None:
|
|
260
|
+
def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, web_search: bool = False, simplify_events: bool = False) -> None:
|
|
280
261
|
"""Initializes a new instance of the XAILMInvoker class.
|
|
281
262
|
|
|
282
263
|
Args:
|
|
@@ -298,6 +279,9 @@ class XAILMInvoker(BaseLMInvoker):
|
|
|
298
279
|
reasoning_effort (ReasoningEffort | None, optional): The reasoning effort for reasoning models. Not allowed
|
|
299
280
|
for non-reasoning models. If None, the model will perform medium reasoning effort. Defaults to None.
|
|
300
281
|
web_search (bool, optional): Whether to enable the web search. Defaults to False.
|
|
282
|
+
simplify_events (bool, optional): Temporary parameter to control the streamed events format.
|
|
283
|
+
When True, uses the simplified events format. When False, uses the legacy events format for
|
|
284
|
+
backward compatibility. Will be removed in v0.6. Defaults to False.
|
|
301
285
|
|
|
302
286
|
Raises:
|
|
303
287
|
ValueError:
|
gllm_inference/schema/events.pyi
CHANGED
|
@@ -8,7 +8,7 @@ class ActivityEvent(Event):
|
|
|
8
8
|
"""Event schema for model-triggered activities (e.g. web search, MCP).
|
|
9
9
|
|
|
10
10
|
Attributes:
|
|
11
|
-
id (str): The unique identifier for the activity event. Defaults to an
|
|
11
|
+
id (str): The unique identifier for the activity event. Defaults to an empty string.
|
|
12
12
|
type (Literal): The type of event, always 'activity'.
|
|
13
13
|
value (Activity): The activity data containing message and type.
|
|
14
14
|
level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
|
|
@@ -22,7 +22,7 @@ class CodeEvent(Event):
|
|
|
22
22
|
"""Event schema for model-triggered code execution.
|
|
23
23
|
|
|
24
24
|
Attributes:
|
|
25
|
-
id (str): The unique identifier for the code event. Defaults to an
|
|
25
|
+
id (str): The unique identifier for the code event. Defaults to an empty string.
|
|
26
26
|
type (Literal): The type of event (code, code_start, or code_end).
|
|
27
27
|
value (str): The code content.
|
|
28
28
|
level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
|
|
@@ -32,32 +32,32 @@ class CodeEvent(Event):
|
|
|
32
32
|
value: str
|
|
33
33
|
level: EventLevel
|
|
34
34
|
@classmethod
|
|
35
|
-
def start(cls,
|
|
35
|
+
def start(cls, id_: str | None = '') -> CodeEvent:
|
|
36
36
|
"""Create a code start event.
|
|
37
37
|
|
|
38
38
|
Args:
|
|
39
|
-
|
|
39
|
+
id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
|
|
40
40
|
|
|
41
41
|
Returns:
|
|
42
42
|
CodeEvent: The code start event.
|
|
43
43
|
"""
|
|
44
44
|
@classmethod
|
|
45
|
-
def content(cls,
|
|
45
|
+
def content(cls, id_: str | None = '', value: str = '') -> CodeEvent:
|
|
46
46
|
"""Create a code content event.
|
|
47
47
|
|
|
48
48
|
Args:
|
|
49
|
-
|
|
49
|
+
id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
|
|
50
50
|
value (str): The code content.
|
|
51
51
|
|
|
52
52
|
Returns:
|
|
53
53
|
CodeEvent: The code value event.
|
|
54
54
|
"""
|
|
55
55
|
@classmethod
|
|
56
|
-
def end(cls,
|
|
56
|
+
def end(cls, id_: str | None = '') -> CodeEvent:
|
|
57
57
|
"""Create a code end event.
|
|
58
58
|
|
|
59
59
|
Args:
|
|
60
|
-
|
|
60
|
+
id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
|
|
61
61
|
|
|
62
62
|
Returns:
|
|
63
63
|
CodeEvent: The code end event.
|
|
@@ -67,7 +67,7 @@ class ThinkingEvent(Event):
|
|
|
67
67
|
"""Event schema for model thinking.
|
|
68
68
|
|
|
69
69
|
Attributes:
|
|
70
|
-
id (str): The unique identifier for the thinking event. Defaults to an
|
|
70
|
+
id (str): The unique identifier for the thinking event. Defaults to an empty string.
|
|
71
71
|
type (Literal): The type of thinking event (thinking, thinking_start, or thinking_end).
|
|
72
72
|
value (str): The thinking content or message.
|
|
73
73
|
level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
|
|
@@ -77,32 +77,32 @@ class ThinkingEvent(Event):
|
|
|
77
77
|
value: str
|
|
78
78
|
level: EventLevel
|
|
79
79
|
@classmethod
|
|
80
|
-
def start(cls,
|
|
80
|
+
def start(cls, id_: str | None = '') -> ThinkingEvent:
|
|
81
81
|
"""Create a thinking start event.
|
|
82
82
|
|
|
83
83
|
Args:
|
|
84
|
-
|
|
84
|
+
id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
|
|
85
85
|
|
|
86
86
|
Returns:
|
|
87
87
|
ThinkingEvent: The thinking start event.
|
|
88
88
|
"""
|
|
89
89
|
@classmethod
|
|
90
|
-
def content(cls,
|
|
90
|
+
def content(cls, id_: str | None = '', value: str = '') -> ThinkingEvent:
|
|
91
91
|
"""Create a thinking value event.
|
|
92
92
|
|
|
93
93
|
Args:
|
|
94
|
-
|
|
94
|
+
id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
|
|
95
95
|
value (str): The thinking content or message.
|
|
96
96
|
|
|
97
97
|
Returns:
|
|
98
98
|
ThinkingEvent: The thinking value event.
|
|
99
99
|
"""
|
|
100
100
|
@classmethod
|
|
101
|
-
def end(cls,
|
|
101
|
+
def end(cls, id_: str | None = '') -> ThinkingEvent:
|
|
102
102
|
"""Create a thinking end event.
|
|
103
103
|
|
|
104
104
|
Args:
|
|
105
|
-
|
|
105
|
+
id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
|
|
106
106
|
|
|
107
107
|
Returns:
|
|
108
108
|
ThinkingEvent: The thinking end event.
|