gllm-inference-binary 0.5.38__cp312-cp312-win_amd64.whl → 0.5.41__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-inference-binary might be problematic. Click here for more details.

Files changed (26) hide show
  1. gllm_inference/em_invoker/azure_openai_em_invoker.pyi +2 -2
  2. gllm_inference/em_invoker/bedrock_em_invoker.pyi +2 -2
  3. gllm_inference/em_invoker/google_em_invoker.pyi +2 -2
  4. gllm_inference/em_invoker/openai_em_invoker.pyi +2 -2
  5. gllm_inference/em_invoker/twelevelabs_em_invoker.pyi +2 -2
  6. gllm_inference/em_invoker/voyage_em_invoker.pyi +2 -2
  7. gllm_inference/lm_invoker/anthropic_lm_invoker.pyi +22 -28
  8. gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +24 -29
  9. gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +10 -20
  10. gllm_inference/lm_invoker/datasaur_lm_invoker.pyi +11 -21
  11. gllm_inference/lm_invoker/google_lm_invoker.pyi +46 -28
  12. gllm_inference/lm_invoker/langchain_lm_invoker.pyi +10 -20
  13. gllm_inference/lm_invoker/litellm_lm_invoker.pyi +25 -30
  14. gllm_inference/lm_invoker/lm_invoker.pyi +4 -1
  15. gllm_inference/lm_invoker/openai_chat_completions_lm_invoker.pyi +22 -28
  16. gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi +4 -1
  17. gllm_inference/lm_invoker/openai_lm_invoker.pyi +45 -50
  18. gllm_inference/lm_invoker/xai_lm_invoker.pyi +26 -42
  19. gllm_inference/schema/events.pyi +15 -15
  20. gllm_inference/schema/lm_output.pyi +4 -0
  21. gllm_inference.cp312-win_amd64.pyd +0 -0
  22. gllm_inference.pyi +1 -1
  23. {gllm_inference_binary-0.5.38.dist-info → gllm_inference_binary-0.5.41.dist-info}/METADATA +1 -1
  24. {gllm_inference_binary-0.5.38.dist-info → gllm_inference_binary-0.5.41.dist-info}/RECORD +26 -26
  25. {gllm_inference_binary-0.5.38.dist-info → gllm_inference_binary-0.5.41.dist-info}/WHEEL +0 -0
  26. {gllm_inference_binary-0.5.38.dist-info → gllm_inference_binary-0.5.41.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ from _typeshed import Incomplete
2
2
  from gllm_core.event import EventEmitter as EventEmitter
3
3
  from gllm_core.schema.tool import Tool as Tool
4
4
  from gllm_core.utils.retry import RetryConfig as RetryConfig
5
- from gllm_inference.lm_invoker.openai_compatible_lm_invoker import OpenAICompatibleLMInvoker as OpenAICompatibleLMInvoker
5
+ from gllm_inference.lm_invoker.openai_chat_completions_lm_invoker import OpenAIChatCompletionsLMInvoker as OpenAIChatCompletionsLMInvoker
6
6
  from gllm_inference.lm_invoker.openai_lm_invoker import ReasoningEffort as ReasoningEffort
7
7
  from gllm_inference.schema import AttachmentType as AttachmentType, LMOutput as LMOutput, ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema
8
8
  from langchain_core.tools import Tool as LangChainTool
@@ -10,7 +10,7 @@ from typing import Any
10
10
 
11
11
  SUPPORTED_ATTACHMENTS: Incomplete
12
12
 
13
- class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
13
+ class LiteLLMLMInvoker(OpenAIChatCompletionsLMInvoker):
14
14
  '''A language model invoker to interact with language models using LiteLLM.
15
15
 
16
16
  Attributes:
@@ -156,9 +156,9 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
156
156
 
157
157
  Retry config examples:
158
158
  ```python
159
- retry_config = RetryConfig(max_retries=0, timeout=0.0) # No retry, no timeout
159
+ retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
160
160
  retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
161
- retry_config = RetryConfig(max_retries=5, timeout=0.0) # 5 max retries, no timeout
161
+ retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
162
162
  retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
163
163
  ```
164
164
 
@@ -192,44 +192,36 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
192
192
  )
193
193
  ```
194
194
 
195
- When streaming is enabled along with reasoning and the provider supports reasoning output, the reasoning token
196
- will be streamed with the `EventType.DATA` event type.
197
-
198
195
  Streaming output example:
199
196
  ```python
200
- {"type": "data", "value": \'{"data_type": "thinking_start", "data_value": ""}\', ...}
201
- {"type": "data", "value": \'{"data_type": "thinking", "data_value": "Let me think "}\', ...}
202
- {"type": "data", "value": \'{"data_type": "thinking", "data_value": "about it..."}\', ...}
203
- {"type": "data", "value": \'{"data_type": "thinking_end", "data_value": ""}\', ...}
197
+ {"type": "thinking_start", "value": ""}\', ...}
198
+ {"type": "thinking", "value": "Let me think "}\', ...}
199
+ {"type": "thinking", "value": "about it..."}\', ...}
200
+ {"type": "thinking_end", "value": ""}\', ...}
204
201
  {"type": "response", "value": "Golden retriever ", ...}
205
202
  {"type": "response", "value": "is a good dog breed.", ...}
203
+ ```
204
+ Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
205
+ To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
206
+ LM invoker initialization. The legacy event format support will be removed in v0.6.
206
207
 
207
208
  Setting reasoning-related parameters for non-reasoning models will raise an error.
208
209
 
209
210
 
210
211
  Output types:
211
212
  The output of the `LiteLLMLMInvoker` can either be:
212
- 1. `str`: The text response if no additional output is needed.
213
- 2. `LMOutput`: A Pydantic model with the following attributes if any additional output is needed:
214
- 2.1. response (str): The text response.
215
- 2.2. tool_calls (list[ToolCall]): The tool calls, if the `tools` parameter is defined and the language
216
- model decides to invoke tools. Defaults to an empty list.
217
- 2.3. structured_output (dict[str, Any] | BaseModel | None): The structured output, if the `response_schema`
218
- parameter is defined. Defaults to None.
219
- 2.4. token_usage (TokenUsage | None): The token usage analytics, if the `output_analytics` parameter is
220
- set to `True`. Defaults to None.
221
- 2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
222
- parameter is set to `True`. Defaults to None.
223
- 2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
224
- `output_analytics` parameter is set to `True`. Defaults to None.
225
- 2.7. reasoning (list[Reasoning]): The reasoning objects. Currently not supported. Defaults to an empty list.
226
- 2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
227
- 2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
228
- Defaults to an empty list.
229
- 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
213
+ 1. `str`: A text response.
214
+ 2. `LMOutput`: A Pydantic model that may contain the following attributes:
215
+ 2.1. response (str)
216
+ 2.2. tool_calls (list[ToolCall])
217
+ 2.3. structured_output (dict[str, Any] | BaseModel | None)
218
+ 2.4. token_usage (TokenUsage | None)
219
+ 2.5. duration (float | None)
220
+ 2.6. finish_details (dict[str, Any])
221
+ 2.7. reasoning (list[Reasoning])
230
222
  '''
231
223
  completion: Incomplete
232
- def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
224
+ def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
233
225
  """Initializes a new instance of the LiteLLMLMInvoker class.
234
226
 
235
227
  Args:
@@ -246,4 +238,7 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
246
238
  Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
247
239
  reasoning_effort (ReasoningEffort | None, optional): The reasoning effort for reasoning models.
248
240
  Defaults to None.
241
+ simplify_events (bool, optional): Temporary parameter to control the streamed events format.
242
+ When True, uses the simplified events format. When False, uses the legacy events format for
243
+ backward compatibility. Will be removed in v0.6. Defaults to False.
249
244
  """
@@ -56,7 +56,7 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
56
56
  response_schema: Incomplete
57
57
  output_analytics: Incomplete
58
58
  retry_config: Incomplete
59
- def __init__(self, model_id: ModelId, default_hyperparameters: dict[str, Any] | None = None, supported_attachments: set[str] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None) -> None:
59
+ def __init__(self, model_id: ModelId, default_hyperparameters: dict[str, Any] | None = None, supported_attachments: set[str] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, simplify_events: bool = False) -> None:
60
60
  """Initializes a new instance of the BaseLMInvoker class.
61
61
 
62
62
  Args:
@@ -73,6 +73,9 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
73
73
  output_analytics (bool, optional): Whether to output the invocation analytics. Defaults to False.
74
74
  retry_config (RetryConfig | None, optional): The retry configuration for the language model.
75
75
  Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
76
+ simplify_events (bool, optional): Temporary parameter to control the streamed events format.
77
+ When True, uses the simplified events format. When False, uses the legacy events format for
78
+ backward compatibility. Will be removed in v0.6. Defaults to False.
76
79
  """
77
80
  @property
78
81
  def model_id(self) -> str:
@@ -171,9 +171,9 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
171
171
 
172
172
  Retry config examples:
173
173
  ```python
174
- retry_config = RetryConfig(max_retries=0, timeout=0.0) # No retry, no timeout
174
+ retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
175
175
  retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
176
- retry_config = RetryConfig(max_retries=5, timeout=0.0) # 5 max retries, no timeout
176
+ retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
177
177
  retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
178
178
  ```
179
179
 
@@ -207,44 +207,35 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
207
207
  )
208
208
  ```
209
209
 
210
- When streaming is enabled along with reasoning and the provider supports reasoning output, the reasoning token
211
- will be streamed with the `EventType.DATA` event type.
212
-
213
210
  Streaming output example:
214
211
  ```python
215
- {"type": "data", "value": \'{"data_type": "thinking_start", "data_value": ""}\', ...}
216
- {"type": "data", "value": \'{"data_type": "thinking", "data_value": "Let me think "}\', ...}
217
- {"type": "data", "value": \'{"data_type": "thinking", "data_value": "about it..."}\', ...}
218
- {"type": "data", "value": \'{"data_type": "thinking_end", "data_value": ""}\', ...}
212
+ {"type": "thinking_start", "value": ""}\', ...}
213
+ {"type": "thinking", "value": "Let me think "}\', ...}
214
+ {"type": "thinking", "value": "about it..."}\', ...}
215
+ {"type": "thinking_end", "value": ""}\', ...}
219
216
  {"type": "response", "value": "Golden retriever ", ...}
220
217
  {"type": "response", "value": "is a good dog breed.", ...}
221
218
  ```
219
+ Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
220
+ To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
221
+ LM invoker initialization. The legacy event format support will be removed in v0.6.
222
222
 
223
223
  Setting reasoning-related parameters for non-reasoning models will raise an error.
224
224
 
225
225
  Output types:
226
226
  The output of the `OpenAIChatCompletionsLMInvoker` can either be:
227
- 1. `str`: The text response if no additional output is needed.
228
- 2. `LMOutput`: A Pydantic model with the following attributes if any additional output is needed:
229
- 2.1. response (str): The text response.
230
- 2.2. tool_calls (list[ToolCall]): The tool calls, if the `tools` parameter is defined and the language
231
- model decides to invoke tools. Defaults to an empty list.
232
- 2.3. structured_output (dict[str, Any] | BaseModel | None): The structured output, if the `response_schema`
233
- parameter is defined. Defaults to None.
234
- 2.4. token_usage (TokenUsage | None): The token usage analytics, if the `output_analytics` parameter is
235
- set to `True`. Defaults to None.
236
- 2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
237
- parameter is set to `True`. Defaults to None.
238
- 2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
239
- `output_analytics` parameter is set to `True`. Defaults to None.
240
- 2.7. reasoning (list[Reasoning]): The reasoning objects. Currently not supported. Defaults to an empty list.
241
- 2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
242
- 2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
243
- Defaults to an empty list.
244
- 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
227
+ 1. `str`: A text response.
228
+ 2. `LMOutput`: A Pydantic model that may contain the following attributes:
229
+ 2.1. response (str)
230
+ 2.2. tool_calls (list[ToolCall])
231
+ 2.3. structured_output (dict[str, Any] | BaseModel | None)
232
+ 2.4. token_usage (TokenUsage | None)
233
+ 2.5. duration (float | None)
234
+ 2.6. finish_details (dict[str, Any])
235
+ 2.7. reasoning (list[Reasoning])
245
236
  '''
246
237
  client_kwargs: Incomplete
247
- def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
238
+ def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
248
239
  '''Initializes a new instance of the OpenAIChatCompletionsLMInvoker class.
249
240
 
250
241
  Args:
@@ -266,6 +257,9 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
266
257
  retry_config (RetryConfig | None, optional): The retry configuration for the language model.
267
258
  Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
268
259
  reasoning_effort (str | None, optional): The reasoning effort for the language model. Defaults to None.
260
+ simplify_events (bool, optional): Temporary parameter to control the streamed events format.
261
+ When True, uses the simplified events format. When False, uses the legacy events format for
262
+ backward compatibility. Will be removed in v0.6. Defaults to False.
269
263
  '''
270
264
  def set_response_schema(self, response_schema: ResponseSchema | None) -> None:
271
265
  """Sets the response schema for the OpenAI language model.
@@ -25,7 +25,7 @@ class OpenAICompatibleLMInvoker(OpenAIChatCompletionsLMInvoker):
25
25
 
26
26
  This class is deprecated and will be removed in v0.6. Please use the `OpenAIChatCompletionsLMInvoker` class instead.
27
27
  """
28
- def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
28
+ def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
29
29
  '''Initializes a new instance of the OpenAICompatibleLMInvoker class.
30
30
 
31
31
  Args:
@@ -46,4 +46,7 @@ class OpenAICompatibleLMInvoker(OpenAIChatCompletionsLMInvoker):
46
46
  retry_config (RetryConfig | None, optional): The retry configuration for the language model.
47
47
  Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
48
48
  reasoning_effort (str | None, optional): The reasoning effort for the language model. Defaults to None.
49
+ simplify_events (bool, optional): Temporary parameter to control the streamed events format.
50
+ When True, uses the simplified events format. When False, uses the legacy events format for
51
+ backward compatibility. Will be removed in v0.6. Defaults to False.
49
52
  '''
@@ -11,7 +11,8 @@ from langchain_core.tools import Tool as LangChainTool
11
11
  from typing import Any
12
12
 
13
13
  SUPPORTED_ATTACHMENTS: Incomplete
14
- STREAM_DATA_TRANSITION_TYPE_MAP: Incomplete
14
+ STREAM_DATA_START_TYPE_MAP: Incomplete
15
+ STREAM_DATA_END_TYPE_MAP: Incomplete
15
16
  STREAM_DATA_CONTENT_TYPE_MAP: Incomplete
16
17
 
17
18
  class OpenAILMInvoker(BaseLMInvoker):
@@ -176,9 +177,9 @@ class OpenAILMInvoker(BaseLMInvoker):
176
177
 
177
178
  Retry config examples:
178
179
  ```python
179
- retry_config = RetryConfig(max_retries=0, timeout=0.0) # No retry, no timeout
180
+ retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
180
181
  retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
181
- retry_config = RetryConfig(max_retries=5, timeout=0.0) # 5 max retries, no timeout
182
+ retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
182
183
  retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
183
184
  ```
184
185
 
@@ -216,18 +217,18 @@ class OpenAILMInvoker(BaseLMInvoker):
216
217
  )
217
218
  ```
218
219
 
219
- When streaming is enabled along with reasoning summary, the reasoning summary token will be streamed with the
220
- `EventType.DATA` event type.
221
-
222
220
  Streaming output example:
223
221
  ```python
224
- {"type": "data", "value": \'{"data_type": "thinking_start", "data_value": ""}\', ...}
225
- {"type": "data", "value": \'{"data_type": "thinking", "data_value": "Let me think "}\', ...}
226
- {"type": "data", "value": \'{"data_type": "thinking", "data_value": "about it..."}\', ...}
227
- {"type": "data", "value": \'{"data_type": "thinking_end", "data_value": ""}\', ...}
222
+ {"type": "thinking_start", "value": ""}\', ...}
223
+ {"type": "thinking", "value": "Let me think "}\', ...}
224
+ {"type": "thinking", "value": "about it..."}\', ...}
225
+ {"type": "thinking_end", "value": ""}\', ...}
228
226
  {"type": "response", "value": "Golden retriever ", ...}
229
227
  {"type": "response", "value": "is a good dog breed.", ...}
230
228
  ```
229
+ Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
230
+ To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
231
+ LM invoker initialization. The legacy event format support will be removed in v0.6.
231
232
 
232
233
  Setting reasoning-related parameters for non-reasoning models will raise an error.
233
234
 
@@ -262,14 +263,16 @@ class OpenAILMInvoker(BaseLMInvoker):
262
263
  )
263
264
  ```
264
265
 
265
- When streaming is enabled, the MCP call activities will be streamed with the `EventType.DATA` event type.
266
266
  Streaming output example:
267
267
  ```python
268
- {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_list_tools\\"}", ...}\', ...}
269
- {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_call\\"}", ...}\', ...}
268
+ {"type": "activity", "value": {"type": "mcp_list_tools", ...}, ...}
269
+ {"type": "activity", "value": {"type": "mcp_call", ...}, ...}
270
270
  {"type": "response", "value": "The result ", ...}
271
271
  {"type": "response", "value": "is 10.", ...}
272
272
  ```
273
+ Note: By default, the activity token will be streamed with the legacy `EventType.DATA` event type.
274
+ To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
275
+ LM invoker initialization. The legacy event format support will be removed in v0.6.
273
276
 
274
277
  Code interpreter:
275
278
  The code interpreter is a feature that allows the language model to write and run Python code in a
@@ -287,14 +290,8 @@ class OpenAILMInvoker(BaseLMInvoker):
287
290
  Messages example:
288
291
  ```python
289
292
  messages = [
290
- Message(
291
- role=MessageRole.SYSTEM,
292
- contents=["You are a data analyst. Use the python tool to generate a file."],
293
- ),
294
- Message(
295
- role=MessageRole.USER,
296
- contents=["Show an histogram of the following data: [1, 2, 1, 4, 1, 2, 4, 2, 3, 1]"],
297
- ),
293
+ Message.system("You are a data analyst. Use the python tool to generate a file."]),
294
+ Message.user("Show an histogram of the following data: [1, 2, 1, 4, 1, 2, 4, 2, 3, 1]"),
298
295
  ]
299
296
  ```
300
297
 
@@ -315,16 +312,18 @@ class OpenAILMInvoker(BaseLMInvoker):
315
312
  )
316
313
  ```
317
314
 
318
- When streaming is enabled, the executed code will be streamed with the `EventType.DATA` event type.
319
315
  Streaming output example:
320
316
  ```python
321
- {"type": "data", "value": \'{"data_type": "code_start", "data_value": ""}\', ...}
322
- {"type": "data", "value": \'{"data_type": "code", "data_value": "import matplotlib"}\', ...}
323
- {"type": "data", "value": \'{"data_type": "code", "data_value": ".pyplot as plt..."}\', ...}
324
- {"type": "data", "value": \'{"data_type": "code_end", "data_value": ""}\', ...}
317
+ {"type": "code_start", "value": ""}\', ...}
318
+ {"type": "code", "value": "import matplotlib"}\', ...}
319
+ {"type": "code", "value": ".pyplot as plt..."}\', ...}
320
+ {"type": "code_end", "value": ""}\', ...}
325
321
  {"type": "response", "value": "The histogram ", ...}
326
322
  {"type": "response", "value": "is attached.", ...}
327
323
  ```
324
+ Note: By default, the code token will be streamed with the legacy `EventType.DATA` event type.
325
+ To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
326
+ LM invoker initialization. The legacy event format support will be removed in v0.6.
328
327
 
329
328
  Web search:
330
329
  The web search is a feature that allows the language model to search the web for relevant information.
@@ -359,40 +358,33 @@ class OpenAILMInvoker(BaseLMInvoker):
359
358
  )
360
359
  ```
361
360
 
362
- When streaming is enabled, the web search activities will be streamed with the `EventType.DATA` event type.
363
361
  Streaming output example:
364
362
  ```python
365
- {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"query\\": \\"search query\\"}", ...}\', ...}
363
+ {"type": "activity", "value": {"query": "search query"}, ...}
366
364
  {"type": "response", "value": "The winner of the match ", ...}
367
365
  {"type": "response", "value": "is team A ([Example title](https://www.example.com)).", ...}
368
366
  ```
367
+ Note: By default, the activity token will be streamed with the legacy `EventType.DATA` event type.
368
+ To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
369
+ LM invoker initialization. The legacy event format support will be removed in v0.6.
369
370
 
370
371
  Output types:
371
372
  The output of the `OpenAILMInvoker` can either be:
372
- 1. `str`: The text response if no additional output is needed.
373
- 2. `LMOutput`: A Pydantic model with the following attributes if any additional output is needed:
374
- 2.1. response (str): The text response.
375
- 2.2. tool_calls (list[ToolCall]): The tool calls, if the `tools` parameter is defined and the language
376
- model decides to invoke tools. Defaults to an empty list.
377
- 2.3. structured_output (dict[str, Any] | BaseModel | None): The structured output, if the `response_schema`
378
- parameter is defined. Defaults to None.
379
- 2.4. token_usage (TokenUsage | None): The token usage analytics, if the `output_analytics` parameter is
380
- set to `True`. Defaults to None.
381
- 2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
382
- parameter is set to `True`. Defaults to None.
383
- 2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
384
- `output_analytics` parameter is set to `True`. Defaults to None.
385
- 2.7. reasoning (list[Reasoning]): The reasoning objects, if the `reasoning_summary` parameter is provided
386
- for reasoning models. Defaults to an empty list.
387
- 2.8. citations (list[Chunk]): The citations, if the web_search is enabled and the language model decides
388
- to cite the relevant sources. Defaults to an empty list.
389
- 2.9. code_exec_results (list[CodeExecResult]): The code execution results, if the code interpreter is
390
- enabled and the language model decides to execute any codes. Defaults to an empty list.
391
- 2.10. mcp_calls (list[MCPCall]): The MCP calls, if the MCP servers are provided and the language model
392
- decides to invoke MCP tools. Defaults to an empty list.
373
+ 1. `str`: A text response.
374
+ 2. `LMOutput`: A Pydantic model that may contain the following attributes:
375
+ 2.1. response (str)
376
+ 2.2. tool_calls (list[ToolCall])
377
+ 2.3. structured_output (dict[str, Any] | BaseModel | None)
378
+ 2.4. token_usage (TokenUsage | None)
379
+ 2.5. duration (float | None)
380
+ 2.6. finish_details (dict[str, Any])
381
+ 2.7. reasoning (list[Reasoning])
382
+ 2.8. citations (list[Chunk])
383
+ 2.9. code_exec_results (list[CodeExecResult])
384
+ 2.10. mcp_calls (list[MCPCall])
393
385
  '''
394
386
  client_kwargs: Incomplete
395
- def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
387
+ def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False, simplify_events: bool = False) -> None:
396
388
  '''Initializes a new instance of the OpenAILMInvoker class.
397
389
 
398
390
  Args:
@@ -421,6 +413,9 @@ class OpenAILMInvoker(BaseLMInvoker):
421
413
  language model. Defaults to None.
422
414
  code_interpreter (bool, optional): Whether to enable the code interpreter. Defaults to False.
423
415
  web_search (bool, optional): Whether to enable the web search. Defaults to False.
416
+ simplify_events (bool, optional): Temporary parameter to control the streamed events format.
417
+ When True, uses the simplified events format. When False, uses the legacy events format for
418
+ backward compatibility. Will be removed in v0.6. Defaults to False.
424
419
 
425
420
  Raises:
426
421
  ValueError:
@@ -153,18 +153,18 @@ class XAILMInvoker(BaseLMInvoker):
153
153
  )
154
154
  ```
155
155
 
156
- When streaming is enabled along with reasoning summary, the reasoning summary token will be streamed with the
157
- `EventType.DATA` event type.
158
-
159
156
  Streaming output example:
160
157
  ```python
161
- {"type": "data", "value": \'{"data_type": "thinking_start", "data_value": ""}\', ...}
162
- {"type": "data", "value": \'{"data_type": "thinking", "data_value": "Let me think "}\', ...}
163
- {"type": "data", "value": \'{"data_type": "thinking", "data_value": "about it..."}\', ...}
164
- {"type": "data", "value": \'{"data_type": "thinking_end", "data_value": ""}\', ...}
158
+ {"type": "thinking_start", "value": ""}\', ...}
159
+ {"type": "thinking", "value": "Let me think "}\', ...}
160
+ {"type": "thinking", "value": "about it..."}\', ...}
161
+ {"type": "thinking_end", "value": ""}\', ...}
165
162
  {"type": "response", "value": "Golden retriever ", ...}
166
163
  {"type": "response", "value": "is a good dog breed.", ...}
167
164
  ```
165
+ Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
166
+ To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
167
+ LM invoker initialization. The legacy event format support will be removed in v0.6.
168
168
 
169
169
  Setting reasoning-related parameters for non-reasoning models will raise an error.
170
170
 
@@ -194,9 +194,9 @@ class XAILMInvoker(BaseLMInvoker):
194
194
 
195
195
  Retry config examples:
196
196
  ```python
197
- retry_config = RetryConfig(max_retries=0, timeout=0.0) # No retry, no timeout
197
+ retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
198
198
  retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
199
- retry_config = RetryConfig(max_retries=5, timeout=0.0) # 5 max retries, no timeout
199
+ retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
200
200
  retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
201
201
  ```
202
202
 
@@ -218,13 +218,13 @@ class XAILMInvoker(BaseLMInvoker):
218
218
  ```
219
219
 
220
220
  When web search is enabled, the language model will search for relevant information and may cite the
221
- relevant sources (including from X platform). The citations will be stored as `Chunk` objects in the `citations`
222
- attribute in the output.
221
+ relevant sources (including from X platform). The citations will be stored as `Chunk` objects in the
222
+ `citations` attribute in the output.
223
223
 
224
224
  Output example:
225
225
  ```python
226
226
  LMOutput(
227
- response="According to recent reports, the latest AI developments include... ([Source](https://example.com)).",
227
+ response="According to recent reports, the latest AI developments... ([Source](https://example.com)).",
228
228
  citations=[
229
229
  Chunk(
230
230
  id="search_result_1",
@@ -241,42 +241,23 @@ class XAILMInvoker(BaseLMInvoker):
241
241
  )
242
242
  ```
243
243
 
244
- When streaming is enabled, the live search activities will be streamed with the `EventType.DATA` event type.
245
- This allows you to track the search process in real-time.
246
-
247
- Streaming output example:
248
- ```python
249
- {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"query\\": \\"search query\\"}", ...}\', ...}
250
- {"type": "response", "value": "According to recent reports, ", ...}
251
- {"type": "response", "value": "the latest AI developments include...", ...}
252
- ```
253
-
254
244
  Output types:
255
245
  The output of the `XAILMInvoker` can either be:
256
- 1. `str`: The text response if no additional output is needed.
257
- 2. `LMOutput`: A Pydantic model with the following attributes if any additional output is needed:
258
- 2.1. response (str): The text response.
259
- 2.2. tool_calls (list[ToolCall]): The tool calls, if the `tools` parameter is defined and the language
260
- model decides to invoke tools. Defaults to an empty list.
261
- 2.3. structured_output (dict[str, Any] | BaseModel | None): The structured output, if the `response_schema`
262
- parameter is defined. Defaults to None.
263
- 2.4. token_usage (TokenUsage | None): The token usage analytics, if the `output_analytics` parameter is
264
- set to `True`. Defaults to None.
265
- 2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
266
- parameter is set to `True`. Defaults to None.
267
- 2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
268
- `output_analytics` parameter is set to `True`. Defaults to None.
269
- 2.7. reasoning (list[Reasoning]): The reasoning objects, if the `reasoning_effort` parameter is set.
270
- Defaults to an empty list.
271
- 2.8. citations (list[Chunk]): The citations, if the web_search is enabled and the language model decides
272
- to cite the relevant sources. Defaults to an empty list.
273
- 2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
274
- Defaults to an empty list.
246
+ 1. `str`: A text response.
247
+ 2. `LMOutput`: A Pydantic model that may contain the following attributes:
248
+ 2.1. response (str)
249
+ 2.2. tool_calls (list[ToolCall])
250
+ 2.3. structured_output (dict[str, Any] | BaseModel | None)
251
+ 2.4. token_usage (TokenUsage | None)
252
+ 2.5. duration (float | None)
253
+ 2.6. finish_details (dict[str, Any])
254
+ 2.7. reasoning (list[Reasoning])
255
+ 2.8. citations (list[Chunk])
275
256
  '''
276
257
  reasoning_effort: Incomplete
277
258
  web_search: Incomplete
278
259
  client_params: Incomplete
279
- def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, web_search: bool = False) -> None:
260
+ def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, web_search: bool = False, simplify_events: bool = False) -> None:
280
261
  """Initializes a new instance of the XAILMInvoker class.
281
262
 
282
263
  Args:
@@ -298,6 +279,9 @@ class XAILMInvoker(BaseLMInvoker):
298
279
  reasoning_effort (ReasoningEffort | None, optional): The reasoning effort for reasoning models. Not allowed
299
280
  for non-reasoning models. If None, the model will perform medium reasoning effort. Defaults to None.
300
281
  web_search (bool, optional): Whether to enable the web search. Defaults to False.
282
+ simplify_events (bool, optional): Temporary parameter to control the streamed events format.
283
+ When True, uses the simplified events format. When False, uses the legacy events format for
284
+ backward compatibility. Will be removed in v0.6. Defaults to False.
301
285
 
302
286
  Raises:
303
287
  ValueError:
@@ -8,7 +8,7 @@ class ActivityEvent(Event):
8
8
  """Event schema for model-triggered activities (e.g. web search, MCP).
9
9
 
10
10
  Attributes:
11
- id (str): The unique identifier for the activity event. Defaults to an UUID string.
11
+ id (str): The unique identifier for the activity event. Defaults to an empty string.
12
12
  type (Literal): The type of event, always 'activity'.
13
13
  value (Activity): The activity data containing message and type.
14
14
  level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
@@ -22,7 +22,7 @@ class CodeEvent(Event):
22
22
  """Event schema for model-triggered code execution.
23
23
 
24
24
  Attributes:
25
- id (str): The unique identifier for the code event. Defaults to an UUID string.
25
+ id (str): The unique identifier for the code event. Defaults to an empty string.
26
26
  type (Literal): The type of event (code, code_start, or code_end).
27
27
  value (str): The code content.
28
28
  level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
@@ -32,32 +32,32 @@ class CodeEvent(Event):
32
32
  value: str
33
33
  level: EventLevel
34
34
  @classmethod
35
- def start(cls, id: str | None = None) -> CodeEvent:
35
+ def start(cls, id_: str | None = '') -> CodeEvent:
36
36
  """Create a code start event.
37
37
 
38
38
  Args:
39
- id (str | None): The unique identifier for the code event. Defaults to an UUID string.
39
+ id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
40
40
 
41
41
  Returns:
42
42
  CodeEvent: The code start event.
43
43
  """
44
44
  @classmethod
45
- def content(cls, id: str | None = None, value: str = '') -> CodeEvent:
45
+ def content(cls, id_: str | None = '', value: str = '') -> CodeEvent:
46
46
  """Create a code content event.
47
47
 
48
48
  Args:
49
- id (str | None): The unique identifier for the code event. Defaults to an UUID string.
49
+ id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
50
50
  value (str): The code content.
51
51
 
52
52
  Returns:
53
53
  CodeEvent: The code value event.
54
54
  """
55
55
  @classmethod
56
- def end(cls, id: str | None = None) -> CodeEvent:
56
+ def end(cls, id_: str | None = '') -> CodeEvent:
57
57
  """Create a code end event.
58
58
 
59
59
  Args:
60
- id (str | None): The unique identifier for the code event. Defaults to an UUID string.
60
+ id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
61
61
 
62
62
  Returns:
63
63
  CodeEvent: The code end event.
@@ -67,7 +67,7 @@ class ThinkingEvent(Event):
67
67
  """Event schema for model thinking.
68
68
 
69
69
  Attributes:
70
- id (str): The unique identifier for the thinking event. Defaults to an UUID string.
70
+ id (str): The unique identifier for the thinking event. Defaults to an empty string.
71
71
  type (Literal): The type of thinking event (thinking, thinking_start, or thinking_end).
72
72
  value (str): The thinking content or message.
73
73
  level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
@@ -77,32 +77,32 @@ class ThinkingEvent(Event):
77
77
  value: str
78
78
  level: EventLevel
79
79
  @classmethod
80
- def start(cls, id: str | None = None) -> ThinkingEvent:
80
+ def start(cls, id_: str | None = '') -> ThinkingEvent:
81
81
  """Create a thinking start event.
82
82
 
83
83
  Args:
84
- id (str | None): The unique identifier for the thinking event. Defaults to an UUID string.
84
+ id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
85
85
 
86
86
  Returns:
87
87
  ThinkingEvent: The thinking start event.
88
88
  """
89
89
  @classmethod
90
- def content(cls, id: str | None = None, value: str = '') -> ThinkingEvent:
90
+ def content(cls, id_: str | None = '', value: str = '') -> ThinkingEvent:
91
91
  """Create a thinking value event.
92
92
 
93
93
  Args:
94
- id (str | None): The unique identifier for the thinking event. Defaults to an UUID string.
94
+ id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
95
95
  value (str): The thinking content or message.
96
96
 
97
97
  Returns:
98
98
  ThinkingEvent: The thinking value event.
99
99
  """
100
100
  @classmethod
101
- def end(cls, id: str | None = None) -> ThinkingEvent:
101
+ def end(cls, id_: str | None = '') -> ThinkingEvent:
102
102
  """Create a thinking end event.
103
103
 
104
104
  Args:
105
- id (str | None): The unique identifier for the thinking event. Defaults to an UUID string.
105
+ id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
106
106
 
107
107
  Returns:
108
108
  ThinkingEvent: The thinking end event.