PyPI - gllm-inference-binary - Versions diffs - 0.5.38__cp312-cp312-win_amd64.whl → 0.5.41__cp312-cp312-win_amd64.whl - Mend

gllm-inference-binary 0.5.38__cp312-cp312-win_amd64.whl → 0.5.41__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gllm-inference-binary might be problematic. Click here for more details.

Files changed (26) hide show

gllm_inference/lm_invoker/litellm_lm_invoker.pyi CHANGED Viewed

@@ -2,7 +2,7 @@ from _typeshed import Incomplete
 from gllm_core.event import EventEmitter as EventEmitter
 from gllm_core.schema.tool import Tool as Tool
 from gllm_core.utils.retry import RetryConfig as RetryConfig
-from gllm_inference.lm_invoker.openai_compatible_lm_invoker import OpenAICompatibleLMInvoker as OpenAICompatibleLMInvoker
+from gllm_inference.lm_invoker.openai_chat_completions_lm_invoker import OpenAIChatCompletionsLMInvoker as OpenAIChatCompletionsLMInvoker
 from gllm_inference.lm_invoker.openai_lm_invoker import ReasoningEffort as ReasoningEffort
 from gllm_inference.schema import AttachmentType as AttachmentType, LMOutput as LMOutput, ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema
 from langchain_core.tools import Tool as LangChainTool
@@ -10,7 +10,7 @@ from typing import Any
 SUPPORTED_ATTACHMENTS: Incomplete
-class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
+class LiteLLMLMInvoker(OpenAIChatCompletionsLMInvoker):
     '''A language model invoker to interact with language models using LiteLLM.
     Attributes:
@@ -156,9 +156,9 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
         Retry config examples:
         ```python
-        retry_config = RetryConfig(max_retries=0, timeout=0.0)  # No retry, no timeout
+        retry_config = RetryConfig(max_retries=0, timeout=None)  # No retry, no timeout
         retry_config = RetryConfig(max_retries=0, timeout=10.0)  # No retry, 10.0 seconds timeout
-        retry_config = RetryConfig(max_retries=5, timeout=0.0)  # 5 max retries, no timeout
+        retry_config = RetryConfig(max_retries=5, timeout=None)  # 5 max retries, no timeout
         retry_config = RetryConfig(max_retries=5, timeout=10.0)  # 5 max retries, 10.0 seconds timeout
         ```
@@ -192,44 +192,36 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
         )
         ```
-        When streaming is enabled along with reasoning and the provider supports reasoning output, the reasoning token
-        will be streamed with the `EventType.DATA` event type.
         Streaming output example:
         ```python
-        {"type": "data", "value": \'{"data_type": "thinking_start", "data_value": ""}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking", "data_value": "Let me think "}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking", "data_value": "about it..."}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking_end", "data_value": ""}\', ...}
+        {"type": "thinking_start", "value": ""}\', ...}
+        {"type": "thinking", "value": "Let me think "}\', ...}
+        {"type": "thinking", "value": "about it..."}\', ...}
+        {"type": "thinking_end", "value": ""}\', ...}
         {"type": "response", "value": "Golden retriever ", ...}
         {"type": "response", "value": "is a good dog breed.", ...}
+        ```
+        Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
+        To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
+        LM invoker initialization. The legacy event format support will be removed in v0.6.
         Setting reasoning-related parameters for non-reasoning models will raise an error.
     Output types:
         The output of the `LiteLLMLMInvoker` can either be:
-        1. `str`: The text response if no additional output is needed.
-        2. `LMOutput`: A Pydantic model with the following attributes if any additional output is needed:
-            2.1. response (str): The text response.
-            2.2. tool_calls (list[ToolCall]): The tool calls, if the `tools` parameter is defined and the language
-                model decides to invoke tools. Defaults to an empty list.
-            2.3. structured_output (dict[str, Any] | BaseModel | None): The structured output, if the `response_schema`
-                parameter is defined. Defaults to None.
-            2.4. token_usage (TokenUsage | None): The token usage analytics, if the `output_analytics` parameter is
-                set to `True`. Defaults to None.
-            2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
-                parameter is set to `True`. Defaults to None.
-            2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
-                `output_analytics` parameter is set to `True`. Defaults to None.
-            2.7. reasoning (list[Reasoning]): The reasoning objects. Currently not supported. Defaults to an empty list.
-            2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
-            2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
-                Defaults to an empty list.
-            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
+        1. `str`: A text response.
+        2. `LMOutput`: A Pydantic model that may contain the following attributes:
+            2.1. response (str)
+            2.2. tool_calls (list[ToolCall])
+            2.3. structured_output (dict[str, Any] | BaseModel | None)
+            2.4. token_usage (TokenUsage | None)
+            2.5. duration (float | None)
+            2.6. finish_details (dict[str, Any])
+            2.7. reasoning (list[Reasoning])
     '''
     completion: Incomplete
-    def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
+    def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
         """Initializes a new instance of the LiteLLMLMInvoker class.
         Args:
@@ -246,4 +238,7 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
                 Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
             reasoning_effort (ReasoningEffort | None, optional): The reasoning effort for reasoning models.
                 Defaults to None.
+            simplify_events (bool, optional): Temporary parameter to control the streamed events format.
+                When True, uses the simplified events format. When False, uses the legacy events format for
+                backward compatibility. Will be removed in v0.6. Defaults to False.
         """

gllm_inference/lm_invoker/lm_invoker.pyi CHANGED Viewed

@@ -56,7 +56,7 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
     response_schema: Incomplete
     output_analytics: Incomplete
     retry_config: Incomplete
-    def __init__(self, model_id: ModelId, default_hyperparameters: dict[str, Any] | None = None, supported_attachments: set[str] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None) -> None:
+    def __init__(self, model_id: ModelId, default_hyperparameters: dict[str, Any] | None = None, supported_attachments: set[str] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, simplify_events: bool = False) -> None:
         """Initializes a new instance of the BaseLMInvoker class.
         Args:
@@ -73,6 +73,9 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
             output_analytics (bool, optional): Whether to output the invocation analytics. Defaults to False.
             retry_config (RetryConfig | None, optional): The retry configuration for the language model.
                 Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
+            simplify_events (bool, optional): Temporary parameter to control the streamed events format.
+                When True, uses the simplified events format. When False, uses the legacy events format for
+                backward compatibility. Will be removed in v0.6. Defaults to False.
         """
     @property
     def model_id(self) -> str:

gllm_inference/lm_invoker/openai_chat_completions_lm_invoker.pyi CHANGED Viewed

@@ -171,9 +171,9 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
         Retry config examples:
         ```python
-        retry_config = RetryConfig(max_retries=0, timeout=0.0)  # No retry, no timeout
+        retry_config = RetryConfig(max_retries=0, timeout=None)  # No retry, no timeout
         retry_config = RetryConfig(max_retries=0, timeout=10.0)  # No retry, 10.0 seconds timeout
-        retry_config = RetryConfig(max_retries=5, timeout=0.0)  # 5 max retries, no timeout
+        retry_config = RetryConfig(max_retries=5, timeout=None)  # 5 max retries, no timeout
         retry_config = RetryConfig(max_retries=5, timeout=10.0)  # 5 max retries, 10.0 seconds timeout
         ```
@@ -207,44 +207,35 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
         )
         ```
-        When streaming is enabled along with reasoning and the provider supports reasoning output, the reasoning token
-        will be streamed with the `EventType.DATA` event type.
         Streaming output example:
         ```python
-        {"type": "data", "value": \'{"data_type": "thinking_start", "data_value": ""}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking", "data_value": "Let me think "}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking", "data_value": "about it..."}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking_end", "data_value": ""}\', ...}
+        {"type": "thinking_start", "value": ""}\', ...}
+        {"type": "thinking", "value": "Let me think "}\', ...}
+        {"type": "thinking", "value": "about it..."}\', ...}
+        {"type": "thinking_end", "value": ""}\', ...}
         {"type": "response", "value": "Golden retriever ", ...}
         {"type": "response", "value": "is a good dog breed.", ...}
         ```
+        Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
+        To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
+        LM invoker initialization. The legacy event format support will be removed in v0.6.
         Setting reasoning-related parameters for non-reasoning models will raise an error.
     Output types:
         The output of the `OpenAIChatCompletionsLMInvoker` can either be:
-        1. `str`: The text response if no additional output is needed.
-        2. `LMOutput`: A Pydantic model with the following attributes if any additional output is needed:
-            2.1. response (str): The text response.
-            2.2. tool_calls (list[ToolCall]): The tool calls, if the `tools` parameter is defined and the language
-                model decides to invoke tools. Defaults to an empty list.
-            2.3. structured_output (dict[str, Any] | BaseModel | None): The structured output, if the `response_schema`
-                parameter is defined. Defaults to None.
-            2.4. token_usage (TokenUsage | None): The token usage analytics, if the `output_analytics` parameter is
-                set to `True`. Defaults to None.
-            2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
-                parameter is set to `True`. Defaults to None.
-            2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
-                `output_analytics` parameter is set to `True`. Defaults to None.
-            2.7. reasoning (list[Reasoning]): The reasoning objects. Currently not supported. Defaults to an empty list.
-            2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
-            2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
-                Defaults to an empty list.
-            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
+        1. `str`: A text response.
+        2. `LMOutput`: A Pydantic model that may contain the following attributes:
+            2.1. response (str)
+            2.2. tool_calls (list[ToolCall])
+            2.3. structured_output (dict[str, Any] | BaseModel | None)
+            2.4. token_usage (TokenUsage | None)
+            2.5. duration (float | None)
+            2.6. finish_details (dict[str, Any])
+            2.7. reasoning (list[Reasoning])
     '''
     client_kwargs: Incomplete
-    def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
+    def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
         '''Initializes a new instance of the OpenAIChatCompletionsLMInvoker class.
         Args:
@@ -266,6 +257,9 @@ class OpenAIChatCompletionsLMInvoker(BaseLMInvoker):
             retry_config (RetryConfig | None, optional): The retry configuration for the language model.
                 Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
             reasoning_effort (str | None, optional): The reasoning effort for the language model. Defaults to None.
+            simplify_events (bool, optional): Temporary parameter to control the streamed events format.
+                When True, uses the simplified events format. When False, uses the legacy events format for
+                backward compatibility. Will be removed in v0.6. Defaults to False.
         '''
     def set_response_schema(self, response_schema: ResponseSchema | None) -> None:
         """Sets the response schema for the OpenAI language model.

gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi CHANGED Viewed

@@ -25,7 +25,7 @@ class OpenAICompatibleLMInvoker(OpenAIChatCompletionsLMInvoker):
     This class is deprecated and will be removed in v0.6. Please use the `OpenAIChatCompletionsLMInvoker` class instead.
     """
-    def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
+    def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, simplify_events: bool = False) -> None:
         '''Initializes a new instance of the OpenAICompatibleLMInvoker class.
         Args:
@@ -46,4 +46,7 @@ class OpenAICompatibleLMInvoker(OpenAIChatCompletionsLMInvoker):
             retry_config (RetryConfig | None, optional): The retry configuration for the language model.
                 Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
             reasoning_effort (str | None, optional): The reasoning effort for the language model. Defaults to None.
+            simplify_events (bool, optional): Temporary parameter to control the streamed events format.
+                When True, uses the simplified events format. When False, uses the legacy events format for
+                backward compatibility. Will be removed in v0.6. Defaults to False.
         '''

gllm_inference/lm_invoker/openai_lm_invoker.pyi CHANGED Viewed

@@ -11,7 +11,8 @@ from langchain_core.tools import Tool as LangChainTool
 from typing import Any
 SUPPORTED_ATTACHMENTS: Incomplete
-STREAM_DATA_TRANSITION_TYPE_MAP: Incomplete
+STREAM_DATA_START_TYPE_MAP: Incomplete
+STREAM_DATA_END_TYPE_MAP: Incomplete
 STREAM_DATA_CONTENT_TYPE_MAP: Incomplete
 class OpenAILMInvoker(BaseLMInvoker):
@@ -176,9 +177,9 @@ class OpenAILMInvoker(BaseLMInvoker):
         Retry config examples:
         ```python
-        retry_config = RetryConfig(max_retries=0, timeout=0.0)  # No retry, no timeout
+        retry_config = RetryConfig(max_retries=0, timeout=None)  # No retry, no timeout
         retry_config = RetryConfig(max_retries=0, timeout=10.0)  # No retry, 10.0 seconds timeout
-        retry_config = RetryConfig(max_retries=5, timeout=0.0)  # 5 max retries, no timeout
+        retry_config = RetryConfig(max_retries=5, timeout=None)  # 5 max retries, no timeout
         retry_config = RetryConfig(max_retries=5, timeout=10.0)  # 5 max retries, 10.0 seconds timeout
         ```
@@ -216,18 +217,18 @@ class OpenAILMInvoker(BaseLMInvoker):
         )
         ```
-        When streaming is enabled along with reasoning summary, the reasoning summary token will be streamed with the
-        `EventType.DATA` event type.
         Streaming output example:
         ```python
-        {"type": "data", "value": \'{"data_type": "thinking_start", "data_value": ""}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking", "data_value": "Let me think "}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking", "data_value": "about it..."}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking_end", "data_value": ""}\', ...}
+        {"type": "thinking_start", "value": ""}\', ...}
+        {"type": "thinking", "value": "Let me think "}\', ...}
+        {"type": "thinking", "value": "about it..."}\', ...}
+        {"type": "thinking_end", "value": ""}\', ...}
         {"type": "response", "value": "Golden retriever ", ...}
         {"type": "response", "value": "is a good dog breed.", ...}
         ```
+        Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
+        To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
+        LM invoker initialization. The legacy event format support will be removed in v0.6.
         Setting reasoning-related parameters for non-reasoning models will raise an error.
@@ -262,14 +263,16 @@ class OpenAILMInvoker(BaseLMInvoker):
         )
         ```
-        When streaming is enabled, the MCP call activities will be streamed with the `EventType.DATA` event type.
         Streaming output example:
         ```python
-        {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_list_tools\\"}", ...}\', ...}
-        {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_call\\"}", ...}\', ...}
+        {"type": "activity", "value": {"type": "mcp_list_tools", ...}, ...}
+        {"type": "activity", "value": {"type": "mcp_call", ...}, ...}
         {"type": "response", "value": "The result ", ...}
         {"type": "response", "value": "is 10.", ...}
         ```
+        Note: By default, the activity token will be streamed with the legacy `EventType.DATA` event type.
+        To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
+        LM invoker initialization. The legacy event format support will be removed in v0.6.
     Code interpreter:
         The code interpreter is a feature that allows the language model to write and run Python code in a
@@ -287,14 +290,8 @@ class OpenAILMInvoker(BaseLMInvoker):
         Messages example:
         ```python
         messages = [
-            Message(
-                role=MessageRole.SYSTEM,
-                contents=["You are a data analyst. Use the python tool to generate a file."],
-            ),
-            Message(
-                role=MessageRole.USER,
-                contents=["Show an histogram of the following data: [1, 2, 1, 4, 1, 2, 4, 2, 3, 1]"],
-            ),
+            Message.system("You are a data analyst. Use the python tool to generate a file."]),
+            Message.user("Show an histogram of the following data: [1, 2, 1, 4, 1, 2, 4, 2, 3, 1]"),
         ]
         ```
@@ -315,16 +312,18 @@ class OpenAILMInvoker(BaseLMInvoker):
         )
         ```
-        When streaming is enabled, the executed code will be streamed with the `EventType.DATA` event type.
         Streaming output example:
         ```python
-        {"type": "data", "value": \'{"data_type": "code_start", "data_value": ""}\', ...}
-        {"type": "data", "value": \'{"data_type": "code", "data_value": "import matplotlib"}\', ...}
-        {"type": "data", "value": \'{"data_type": "code", "data_value": ".pyplot as plt..."}\', ...}
-        {"type": "data", "value": \'{"data_type": "code_end", "data_value": ""}\', ...}
+        {"type": "code_start", "value": ""}\', ...}
+        {"type": "code", "value": "import matplotlib"}\', ...}
+        {"type": "code", "value": ".pyplot as plt..."}\', ...}
+        {"type": "code_end", "value": ""}\', ...}
         {"type": "response", "value": "The histogram ", ...}
         {"type": "response", "value": "is attached.", ...}
         ```
+        Note: By default, the code token will be streamed with the legacy `EventType.DATA` event type.
+        To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
+        LM invoker initialization. The legacy event format support will be removed in v0.6.
     Web search:
         The web search is a feature that allows the language model to search the web for relevant information.
@@ -359,40 +358,33 @@ class OpenAILMInvoker(BaseLMInvoker):
         )
         ```
-        When streaming is enabled, the web search activities will be streamed with the `EventType.DATA` event type.
         Streaming output example:
         ```python
-        {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"query\\": \\"search query\\"}", ...}\', ...}
+        {"type": "activity", "value": {"query": "search query"}, ...}
         {"type": "response", "value": "The winner of the match ", ...}
         {"type": "response", "value": "is team A ([Example title](https://www.example.com)).", ...}
         ```
+        Note: By default, the activity token will be streamed with the legacy `EventType.DATA` event type.
+        To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
+        LM invoker initialization. The legacy event format support will be removed in v0.6.
     Output types:
         The output of the `OpenAILMInvoker` can either be:
-        1. `str`: The text response if no additional output is needed.
-        2. `LMOutput`: A Pydantic model with the following attributes if any additional output is needed:
-            2.1. response (str): The text response.
-            2.2. tool_calls (list[ToolCall]): The tool calls, if the `tools` parameter is defined and the language
-                model decides to invoke tools. Defaults to an empty list.
-            2.3. structured_output (dict[str, Any] | BaseModel | None): The structured output, if the `response_schema`
-                parameter is defined. Defaults to None.
-            2.4. token_usage (TokenUsage | None): The token usage analytics, if the `output_analytics` parameter is
-                set to `True`. Defaults to None.
-            2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
-                parameter is set to `True`. Defaults to None.
-            2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
-                `output_analytics` parameter is set to `True`. Defaults to None.
-            2.7. reasoning (list[Reasoning]): The reasoning objects, if the `reasoning_summary` parameter is provided
-                for reasoning models. Defaults to an empty list.
-            2.8. citations (list[Chunk]): The citations, if the web_search is enabled and the language model decides
-                to cite the relevant sources. Defaults to an empty list.
-            2.9. code_exec_results (list[CodeExecResult]): The code execution results, if the code interpreter is
-                enabled and the language model decides to execute any codes. Defaults to an empty list.
-            2.10. mcp_calls (list[MCPCall]): The MCP calls, if the MCP servers are provided and the language model
-                decides to invoke MCP tools. Defaults to an empty list.
+        1. `str`: A text response.
+        2. `LMOutput`: A Pydantic model that may contain the following attributes:
+            2.1. response (str)
+            2.2. tool_calls (list[ToolCall])
+            2.3. structured_output (dict[str, Any] | BaseModel | None)
+            2.4. token_usage (TokenUsage | None)
+            2.5. duration (float | None)
+            2.6. finish_details (dict[str, Any])
+            2.7. reasoning (list[Reasoning])
+            2.8. citations (list[Chunk])
+            2.9. code_exec_results (list[CodeExecResult])
+            2.10. mcp_calls (list[MCPCall])
     '''
     client_kwargs: Incomplete
-    def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
+    def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False, simplify_events: bool = False) -> None:
         '''Initializes a new instance of the OpenAILMInvoker class.
         Args:
@@ -421,6 +413,9 @@ class OpenAILMInvoker(BaseLMInvoker):
                 language model. Defaults to None.
             code_interpreter (bool, optional): Whether to enable the code interpreter. Defaults to False.
             web_search (bool, optional): Whether to enable the web search. Defaults to False.
+            simplify_events (bool, optional): Temporary parameter to control the streamed events format.
+                When True, uses the simplified events format. When False, uses the legacy events format for
+                backward compatibility. Will be removed in v0.6. Defaults to False.
         Raises:
             ValueError:

gllm_inference/lm_invoker/xai_lm_invoker.pyi CHANGED Viewed

@@ -153,18 +153,18 @@ class XAILMInvoker(BaseLMInvoker):
         )
         ```
-        When streaming is enabled along with reasoning summary, the reasoning summary token will be streamed with the
-        `EventType.DATA` event type.
         Streaming output example:
         ```python
-        {"type": "data", "value": \'{"data_type": "thinking_start", "data_value": ""}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking", "data_value": "Let me think "}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking", "data_value": "about it..."}\', ...}
-        {"type": "data", "value": \'{"data_type": "thinking_end", "data_value": ""}\', ...}
+        {"type": "thinking_start", "value": ""}\', ...}
+        {"type": "thinking", "value": "Let me think "}\', ...}
+        {"type": "thinking", "value": "about it..."}\', ...}
+        {"type": "thinking_end", "value": ""}\', ...}
         {"type": "response", "value": "Golden retriever ", ...}
         {"type": "response", "value": "is a good dog breed.", ...}
         ```
+        Note: By default, the thinking token will be streamed with the legacy `EventType.DATA` event type.
+        To use the new simplified streamed event format, set the `simplify_events` parameter to `True` during
+        LM invoker initialization. The legacy event format support will be removed in v0.6.
         Setting reasoning-related parameters for non-reasoning models will raise an error.
@@ -194,9 +194,9 @@ class XAILMInvoker(BaseLMInvoker):
         Retry config examples:
         ```python
-        retry_config = RetryConfig(max_retries=0, timeout=0.0)  # No retry, no timeout
+        retry_config = RetryConfig(max_retries=0, timeout=None)  # No retry, no timeout
         retry_config = RetryConfig(max_retries=0, timeout=10.0)  # No retry, 10.0 seconds timeout
-        retry_config = RetryConfig(max_retries=5, timeout=0.0)  # 5 max retries, no timeout
+        retry_config = RetryConfig(max_retries=5, timeout=None)  # 5 max retries, no timeout
         retry_config = RetryConfig(max_retries=5, timeout=10.0)  # 5 max retries, 10.0 seconds timeout
         ```
@@ -218,13 +218,13 @@ class XAILMInvoker(BaseLMInvoker):
         ```
         When web search is enabled, the language model will search for relevant information and may cite the
-        relevant sources (including from X platform). The citations will be stored as `Chunk` objects in the `citations`
-        attribute in the output.
+        relevant sources (including from X platform). The citations will be stored as `Chunk` objects in the
+        `citations` attribute in the output.
         Output example:
         ```python
         LMOutput(
-            response="According to recent reports, the latest AI developments include... ([Source](https://example.com)).",
+            response="According to recent reports, the latest AI developments... ([Source](https://example.com)).",
             citations=[
                 Chunk(
                     id="search_result_1",
@@ -241,42 +241,23 @@ class XAILMInvoker(BaseLMInvoker):
         )
         ```
-        When streaming is enabled, the live search activities will be streamed with the `EventType.DATA` event type.
-        This allows you to track the search process in real-time.
-        Streaming output example:
-        ```python
-        {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"query\\": \\"search query\\"}", ...}\', ...}
-        {"type": "response", "value": "According to recent reports, ", ...}
-        {"type": "response", "value": "the latest AI developments include...", ...}
-        ```
     Output types:
         The output of the `XAILMInvoker` can either be:
-        1. `str`: The text response if no additional output is needed.
-        2. `LMOutput`: A Pydantic model with the following attributes if any additional output is needed:
-            2.1. response (str): The text response.
-            2.2. tool_calls (list[ToolCall]): The tool calls, if the `tools` parameter is defined and the language
-                model decides to invoke tools. Defaults to an empty list.
-            2.3. structured_output (dict[str, Any] | BaseModel | None): The structured output, if the `response_schema`
-                parameter is defined. Defaults to None.
-            2.4. token_usage (TokenUsage | None): The token usage analytics, if the `output_analytics` parameter is
-                set to `True`. Defaults to None.
-            2.5. duration (float | None): The duration of the invocation in seconds, if the `output_analytics`
-                parameter is set to `True`. Defaults to None.
-            2.6. finish_details (dict[str, Any] | None): The details about how the generation finished, if the
-                `output_analytics` parameter is set to `True`. Defaults to None.
-            2.7. reasoning (list[Reasoning]): The reasoning objects, if the `reasoning_effort` parameter is set.
-                Defaults to an empty list.
-            2.8. citations (list[Chunk]): The citations, if the web_search is enabled and the language model decides
-                to cite the relevant sources. Defaults to an empty list.
-            2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
-                Defaults to an empty list.
+        1. `str`: A text response.
+        2. `LMOutput`: A Pydantic model that may contain the following attributes:
+            2.1. response (str)
+            2.2. tool_calls (list[ToolCall])
+            2.3. structured_output (dict[str, Any] | BaseModel | None)
+            2.4. token_usage (TokenUsage | None)
+            2.5. duration (float | None)
+            2.6. finish_details (dict[str, Any])
+            2.7. reasoning (list[Reasoning])
+            2.8. citations (list[Chunk])
     '''
     reasoning_effort: Incomplete
     web_search: Incomplete
     client_params: Incomplete
-    def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, web_search: bool = False) -> None:
+    def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, web_search: bool = False, simplify_events: bool = False) -> None:
         """Initializes a new instance of the XAILMInvoker class.
         Args:
@@ -298,6 +279,9 @@ class XAILMInvoker(BaseLMInvoker):
             reasoning_effort (ReasoningEffort | None, optional): The reasoning effort for reasoning models. Not allowed
                 for non-reasoning models. If None, the model will perform medium reasoning effort. Defaults to None.
             web_search (bool, optional): Whether to enable the web search. Defaults to False.
+            simplify_events (bool, optional): Temporary parameter to control the streamed events format.
+                When True, uses the simplified events format. When False, uses the legacy events format for
+                backward compatibility. Will be removed in v0.6. Defaults to False.
         Raises:
             ValueError:

gllm_inference/schema/events.pyi CHANGED Viewed

@@ -8,7 +8,7 @@ class ActivityEvent(Event):
     """Event schema for model-triggered activities (e.g. web search, MCP).
     Attributes:
-        id (str): The unique identifier for the activity event. Defaults to an UUID string.
+        id (str): The unique identifier for the activity event. Defaults to an empty string.
         type (Literal): The type of event, always 'activity'.
         value (Activity): The activity data containing message and type.
         level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
@@ -22,7 +22,7 @@ class CodeEvent(Event):
     """Event schema for model-triggered code execution.
     Attributes:
-        id (str): The unique identifier for the code event. Defaults to an UUID string.
+        id (str): The unique identifier for the code event. Defaults to an empty string.
         type (Literal): The type of event (code, code_start, or code_end).
         value (str): The code content.
         level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
@@ -32,32 +32,32 @@ class CodeEvent(Event):
     value: str
     level: EventLevel
     @classmethod
-    def start(cls, id: str | None = None) -> CodeEvent:
+    def start(cls, id_: str | None = '') -> CodeEvent:
         """Create a code start event.
         Args:
-            id (str | None): The unique identifier for the code event. Defaults to an UUID string.
+            id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
         Returns:
             CodeEvent: The code start event.
         """
     @classmethod
-    def content(cls, id: str | None = None, value: str = '') -> CodeEvent:
+    def content(cls, id_: str | None = '', value: str = '') -> CodeEvent:
         """Create a code content event.
         Args:
-            id (str | None): The unique identifier for the code event. Defaults to an UUID string.
+            id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
             value (str): The code content.
         Returns:
             CodeEvent: The code value event.
         """
     @classmethod
-    def end(cls, id: str | None = None) -> CodeEvent:
+    def end(cls, id_: str | None = '') -> CodeEvent:
         """Create a code end event.
         Args:
-            id (str | None): The unique identifier for the code event. Defaults to an UUID string.
+            id_ (str | None): The unique identifier for the code event. Defaults to an empty string.
         Returns:
             CodeEvent: The code end event.
@@ -67,7 +67,7 @@ class ThinkingEvent(Event):
     """Event schema for model thinking.
     Attributes:
-        id (str): The unique identifier for the thinking event. Defaults to an UUID string.
+        id (str): The unique identifier for the thinking event. Defaults to an empty string.
         type (Literal): The type of thinking event (thinking, thinking_start, or thinking_end).
         value (str): The thinking content or message.
         level (EventLevel): The severity level of the event. Defined through the EventLevel constants.
@@ -77,32 +77,32 @@ class ThinkingEvent(Event):
     value: str
     level: EventLevel
     @classmethod
-    def start(cls, id: str | None = None) -> ThinkingEvent:
+    def start(cls, id_: str | None = '') -> ThinkingEvent:
         """Create a thinking start event.
         Args:
-            id (str | None): The unique identifier for the thinking event. Defaults to an UUID string.
+            id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
         Returns:
             ThinkingEvent: The thinking start event.
         """
     @classmethod
-    def content(cls, id: str | None = None, value: str = '') -> ThinkingEvent:
+    def content(cls, id_: str | None = '', value: str = '') -> ThinkingEvent:
         """Create a thinking value event.
         Args:
-            id (str | None): The unique identifier for the thinking event. Defaults to an UUID string.
+            id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
             value (str): The thinking content or message.
         Returns:
             ThinkingEvent: The thinking value event.
         """
     @classmethod
-    def end(cls, id: str | None = None) -> ThinkingEvent:
+    def end(cls, id_: str | None = '') -> ThinkingEvent:
         """Create a thinking end event.
         Args:
-            id (str | None): The unique identifier for the thinking event. Defaults to an UUID string.
+            id_ (str | None): The unique identifier for the thinking event. Defaults to an empty string.
         Returns:
             ThinkingEvent: The thinking end event.