PyPI - gllm-inference-binary - Versions diffs - 0.5.29__cp311-cp311-macosx_13_0_x86_64.whl → 0.5.31__cp311-cp311-macosx_13_0_x86_64.whl - Mend

gllm-inference-binary 0.5.29__cp311-cp311-macosx_13_0_x86_64.whl → 0.5.31__cp311-cp311-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gllm-inference-binary might be problematic. Click here for more details.

Files changed (22) hide show

gllm_inference/lm_invoker/anthropic_lm_invoker.pyi CHANGED Viewed

@@ -5,7 +5,7 @@ from gllm_core.utils.retry import RetryConfig as RetryConfig
 from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
 from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
 from gllm_inference.lm_invoker.schema.anthropic import InputType as InputType, Key as Key, OutputType as OutputType
-from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
+from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMInput as LMInput, LMOutput as LMOutput, Message as Message, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
 from langchain_core.tools import Tool as LangChainTool
 from typing import Any
@@ -202,38 +202,66 @@ class AnthropicLMInvoker(BaseLMInvoker):
     Batch processing:
         The `AnthropicLMInvoker` supports batch processing, which allows the language model to process multiple
-        requests in a single call. The batch processing operations include:
+        requests in a single call. Batch processing is supported through the `batch` attribute.
+        Usage example:
+        ```python
+        requests = {"request_1": "What color is the sky?", "request_2": "What color is the grass?"}
+        results = await lm_invoker.batch.invoke(requests)
+        ```
+        Output example:
+        ```python
+        {
+            "request_1": LMOutput(response="The sky is blue."),
+            "request_2": LMOutput(finish_details={"type": "error", "error": {"message": "...", ...}, ...}),
+        }
+        ```
+        The `AnthropicLMInvoker` also supports the following standalone batch processing operations:
         1. Create a batch job:
-            >>> requests = {"request_1": "What color is the sky?", "request_2": "What color is the grass?"}
-            >>> batch_id = await lm_invoker.batch.create(requests)
-            >>> print(batch_id)
-            "batch_123"
+            ```python
+            requests = {"request_1": "What color is the sky?", "request_2": "What color is the grass?"}
+            batch_id = await lm_invoker.batch.create(requests)
+            ```
         2. Get the status of a batch job:
-            >>> status = await lm_invoker.batch.status(batch_id)
-            >>> print(status)
-            "finished"
+            ```python
+            status = await lm_invoker.batch.status(batch_id)
+            ```
         3. Retrieve the results of a batch job:
-            >>> results = await lm_invoker.batch.retrieve(batch_id)
-            >>> print(results)
+            ```python
+            results = await lm_invoker.batch.retrieve(batch_id)
+            ```
+            Output example:
+            ```python
             {
                 "request_1": LMOutput(response="The sky is blue."),
                 "request_2": LMOutput(finish_details={"type": "error", "error": {"message": "...", ...}, ...}),
             }
+            ```
         4. List the batch jobs:
-            >>> batch_jobs = await lm_invoker.batch.list()
-            >>> print(batch_jobs)
+            ```python
+            batch_jobs = await lm_invoker.batch.list()
+            ```
+            Output example:
+            ```python
             [
                 {"id": "batch_123", "status": "finished"},
                 {"id": "batch_456", "status": "in_progress"},
                 {"id": "batch_789", "status": "canceling"},
             ]
+            ```
         5. Cancel a batch job:
-            >>> await lm_invoker.batch.cancel(batch_id)
+            ```python
+            await lm_invoker.batch.cancel(batch_id)
+            ```
     Output types:
         The output of the `AnthropicLMInvoker` can either be:
@@ -255,6 +283,7 @@ class AnthropicLMInvoker(BaseLMInvoker):
             2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
                 Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
     '''
     client: Incomplete
     thinking: Incomplete

gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi CHANGED Viewed

@@ -26,6 +26,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
             for non-reasoning models. If None, the model will perform medium reasoning effort.
         reasoning_summary (ReasoningSummary | None): The reasoning summary level for reasoning models. Not allowed
             for non-reasoning models. If None, no summary will be generated.
+        mcp_servers (list[MCPServer]): The list of MCP servers to enable MCP tool calling.
         code_interpreter (bool): Whether to enable the code interpreter. Currently not supported.
         web_search (bool): Whether to enable the web search. Currently not supported.
@@ -218,6 +219,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
             2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
                 Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
     '''
     client: Incomplete
     def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:

gllm_inference/lm_invoker/batch/batch_operations.pyi CHANGED Viewed

@@ -1,28 +1,44 @@
-from gllm_inference.schema import BatchStatus as BatchStatus, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent
+from gllm_inference.exceptions import InvokerRuntimeError as InvokerRuntimeError
+from gllm_inference.schema import BatchStatus as BatchStatus, LMInput as LMInput, LMOutput as LMOutput
 from typing import Any
+DEFAULT_STATUS_CHECK_INTERVAL: float
 class BatchOperations:
     """Handles batch operations for an LM invoker.
     This class provides a wrapper around the batch operations of an LM invoker.
-    It provides a simple interface for creating, retrieving, and canceling batch jobs.
-    This enables LM invokers to support the following batch operations:
-        Create a batch job:
-        >>> batch_id = await lm_invoker.batch.create(...)
-        Get the status of a batch job:
-        >>> status = await lm_invoker.batch.status(batch_id)
-        Retrieve the results of a batch job:
-        >>> results = await lm_invoker.batch.retrieve(batch_id)
-        List the batch jobs:
-        >>> batch_jobs = await lm_invoker.batch.list()
-        Cancel a batch job:
-        >>> await lm_invoker.batch.cancel(batch_id)
+    It provides a simple interface to perform batch invocation:
+    ```python
+    results = await lm_invoker.batch.invoke(...)
+    ```
+    Additionally, it also supports the following standalone batch operations:
+    1. Create a batch job:
+    ```python
+    batch_id = await lm_invoker.batch.create(...)
+    ```
+    2. Get the status of a batch job:
+    ```python
+    status = await lm_invoker.batch.status(batch_id)
+    ```
+    3. Retrieve the results of a batch job:
+    ```python
+    results = await lm_invoker.batch.retrieve(batch_id)
+    ```
+    4. List the batch jobs:
+    ```python
+    batch_jobs = await lm_invoker.batch.list()
+    ```
+    5. Cancel a batch job:
+    ```python
+    await lm_invoker.batch.cancel(batch_id)
+    ```
     """
     def __init__(self, invoker: BaseLMInvoker) -> None:
         """Initializes the batch operations.
@@ -30,12 +46,47 @@ class BatchOperations:
         Args:
             invoker (BaseLMInvoker): The LM invoker to use for the batch operations.
         """
-    async def create(self, requests: dict[str, list[Message] | list[MessageContent] | str], hyperparameters: dict[str, Any] | None = None) -> str:
+    async def invoke(self, requests: dict[str, LMInput], hyperparameters: dict[str, Any] | None = None, status_check_interval: float = ..., max_iterations: int | None = None) -> dict[str, LMOutput]:
+        """Invokes the language model in batch mode.
+        This method orchestrates the entire batch invocation process, including;
+        1. Creating a batch job.
+        2. Iteratively checking the status of the batch job until it is finished.
+        3. Retrieving the results of the batch job.
+        The method includes retry logic with exponential backoff for transient failures.
+        Args:
+            requests (dict[str, LMInput]): The dictionary of requests that maps request ID to the request.
+                Each request must be a valid input for the language model.
+                1. If the request is a list of Message objects, it is used as is.
+                2. If the request is a list of MessageContent or a string, it is converted into a user message.
+            hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the language model.
+                Defaults to None, in which case the default hyperparameters are used.
+            status_check_interval (float, optional): The interval in seconds to check the status of the batch job.
+                Defaults to DEFAULT_STATUS_CHECK_INTERVAL.
+            max_iterations (int | None, optional): The maximum number of iterations to check the status of the batch
+                job. Defaults to None, in which case the number of iterations is infinite.
+        Returns:
+            dict[str, LMOutput]: The results of the batch job.
+        Raises:
+            CancelledError: If the invocation is cancelled.
+            ModelNotFoundError: If the model is not found.
+            ProviderAuthError: If the model authentication fails.
+            ProviderInternalError: If the model internal error occurs.
+            ProviderInvalidArgsError: If the model parameters are invalid.
+            ProviderOverloadedError: If the model is overloaded.
+            ProviderRateLimitError: If the model rate limit is exceeded.
+            TimeoutError: If the invocation times out.
+            ValueError: If the messages are not in the correct format.
+        """
+    async def create(self, requests: dict[str, LMInput], hyperparameters: dict[str, Any] | None = None) -> str:
         """Creates a new batch job.
         Args:
-            requests (dict[str, list[Message] | list[MessageContent] | str]): The dictionary of requests that maps
-                request ID to the request. Each request must be a valid input for the language model.
+            requests (dict[str, LMInput]): The dictionary of requests that maps request ID to the request.
+                Each request must be a valid input for the language model.
                 1. If the request is a list of Message objects, it is used as is.
                 2. If the request is a list of MessageContent or a string, it is converted into a user message.
             hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the language model.

gllm_inference/lm_invoker/bedrock_lm_invoker.pyi CHANGED Viewed

@@ -179,6 +179,7 @@ class BedrockLMInvoker(BaseLMInvoker):
             2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
                 Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
     '''
     session: Incomplete
     client_kwargs: Incomplete

gllm_inference/lm_invoker/datasaur_lm_invoker.pyi CHANGED Viewed

@@ -119,6 +119,7 @@ class DatasaurLMInvoker(OpenAICompatibleLMInvoker):
             2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
                 Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
     '''
     client: Incomplete
     citations: Incomplete

gllm_inference/lm_invoker/google_lm_invoker.pyi CHANGED Viewed

@@ -254,6 +254,7 @@ class GoogleLMInvoker(BaseLMInvoker):
             2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
                 Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
     '''
     client_params: Incomplete
     thinking: Incomplete

gllm_inference/lm_invoker/langchain_lm_invoker.pyi CHANGED Viewed

@@ -205,6 +205,7 @@ class LangChainLMInvoker(BaseLMInvoker):
             2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
                 Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
     '''
     model: Incomplete
     def __init__(self, model: BaseChatModel | None = None, model_class_path: str | None = None, model_name: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None) -> None:

gllm_inference/lm_invoker/litellm_lm_invoker.pyi CHANGED Viewed

@@ -226,6 +226,7 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
             2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
                 Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
     '''
     completion: Incomplete
     def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:

gllm_inference/lm_invoker/lm_invoker.pyi CHANGED Viewed

@@ -7,7 +7,7 @@ from gllm_core.utils import RetryConfig
 from gllm_inference.constants import DOCUMENT_MIME_TYPES as DOCUMENT_MIME_TYPES, INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT
 from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_to_base_invoker_error as convert_to_base_invoker_error
 from gllm_inference.lm_invoker.batch import BatchOperations as BatchOperations
-from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole, ModelId as ModelId, Reasoning as Reasoning, ResponseSchema as ResponseSchema, ToolCall as ToolCall, ToolResult as ToolResult
+from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMInput as LMInput, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole, ModelId as ModelId, Reasoning as Reasoning, ResponseSchema as ResponseSchema, ToolCall as ToolCall, ToolResult as ToolResult
 from langchain_core.tools import Tool as LangChainTool
 from typing import Any
@@ -128,7 +128,7 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
         This method clears the response schema for the language model by calling the `set_response_schema` method with
         None.
         """
-    async def invoke(self, messages: list[Message] | list[MessageContent] | str, hyperparameters: dict[str, Any] | None = None, event_emitter: EventEmitter | None = None) -> str | LMOutput:
+    async def invoke(self, messages: LMInput, hyperparameters: dict[str, Any] | None = None, event_emitter: EventEmitter | None = None) -> str | LMOutput:
         """Invokes the language model.
         This method validates the messages and invokes the language model. It handles both standard
@@ -136,7 +136,7 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
         The method includes retry logic with exponential backoff for transient failures.
         Args:
-            messages (list[Message] | list[MessageContent] | str): The input messages for the language model.
+            messages (LMInput): The input messages for the language model.
                 1. If a list of Message objects is provided, it is used as is.
                 2. If a list of MessageContent or a string is provided, it is converted into a user message.
             hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the language model.

gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi CHANGED Viewed

@@ -230,6 +230,7 @@ class OpenAICompatibleLMInvoker(BaseLMInvoker):
             2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
                 Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
     '''
     client: Incomplete
     def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:

gllm_inference/lm_invoker/openai_lm_invoker.pyi CHANGED Viewed

@@ -5,7 +5,7 @@ from gllm_core.utils.retry import RetryConfig as RetryConfig
 from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
 from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
 from gllm_inference.lm_invoker.schema.openai import InputType as InputType, Key as Key, OutputType as OutputType, ReasoningEffort as ReasoningEffort, ReasoningSummary as ReasoningSummary
-from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, CodeExecResult as CodeExecResult, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
+from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, CodeExecResult as CodeExecResult, EmitDataType as EmitDataType, LMOutput as LMOutput, MCPCall as MCPCall, MCPServer as MCPServer, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
 from gllm_inference.utils import validate_string_enum as validate_string_enum
 from langchain_core.tools import Tool as LangChainTool
 from typing import Any
@@ -30,6 +30,7 @@ class OpenAILMInvoker(BaseLMInvoker):
             for non-reasoning models. If None, the model will perform medium reasoning effort.
         reasoning_summary (ReasoningSummary | None): The reasoning summary level for reasoning models. Not allowed
             for non-reasoning models. If None, no summary will be generated.
+        mcp_servers (list[MCPServer]): The list of MCP servers to enable MCP tool calling.
         code_interpreter (bool): Whether to enable the code interpreter.
         web_search (bool): Whether to enable the web search.
@@ -202,6 +203,46 @@ class OpenAILMInvoker(BaseLMInvoker):
         Setting reasoning-related parameters for non-reasoning models will raise an error.
+    MCP tool calling:
+        The `OpenAILMInvoker` supports MCP tool calling. This feature can be enabled by providing a list of
+        MCP servers to the `mcp_servers` parameter. When MCP servers are provided and the model decides to call
+        an MCP tool, the MCP calls are stored in the `mcp_calls` attribute in the output.
+        Usage example:
+        ```python
+        from gllm_inference.schema import MCPServer
+        mcp_server_1 = MCPServer(
+            url="https://mcp_server_1.com",
+            name="mcp_server_1",
+        )
+        lm_invoker = OpenAILMInvoker(..., mcp_servers=[mcp_server_1])
+        ```
+        Output example:
+        ```python
+        LMOutput(
+            response="The result is 10.",
+            mcp_calls=[
+                MCPCall(
+                    id="123",
+                    server_name="mcp_server_1",
+                    tool_name="mcp_tool_1",
+                    args={"key": "value"},
+                    output="The result is 10.",
+                ),
+            ],
+        )
+        ```
+        When streaming is enabled, the MCP call activities will be streamed with the `EventType.DATA` event type.
+        Streaming output example:
+        ```python
+        {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_list_tools\\"}", ...}\', ...}
+        {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_call\\"}", ...}\', ...}
+        {"type": "response", "value": "The result ", ...}
+        {"type": "response", "value": "is 10.", ...}
+        ```
     Code interpreter:
         The code interpreter is a feature that allows the language model to write and run Python code in a
         sandboxed environment to solve complex problems in domains like data analysis, coding, and math.
@@ -319,9 +360,11 @@ class OpenAILMInvoker(BaseLMInvoker):
                 to cite the relevant sources. Defaults to an empty list.
             2.9. code_exec_results (list[CodeExecResult]): The code execution results, if the code interpreter is
                 enabled and the language model decides to execute any codes. Defaults to an empty list.
+            2.10. mcp_calls (list[MCPCall]): The MCP calls, if the MCP servers are provided and the language model
+                decides to invoke MCP tools. Defaults to an empty list.
     '''
     client: Incomplete
-    def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
+    def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
         """Initializes a new instance of the OpenAILMInvoker class.
         Args:
@@ -343,6 +386,8 @@ class OpenAILMInvoker(BaseLMInvoker):
                 for non-reasoning models. If None, the model will perform medium reasoning effort. Defaults to None.
             reasoning_summary (ReasoningSummary | None, optional): The reasoning summary level for reasoning models.
                 Not allowed for non-reasoning models. If None, no summary will be generated. Defaults to None.
+            mcp_servers (list[MCPServer] | None, optional): The MCP servers containing tools to be accessed by the
+                language model. Defaults to None.
             code_interpreter (bool, optional): Whether to enable the code interpreter. Defaults to False.
             web_search (bool, optional): Whether to enable the web search. Defaults to False.

gllm_inference/lm_invoker/schema/openai.pyi CHANGED Viewed

@@ -2,6 +2,8 @@ from enum import StrEnum
 class Key:
     """Defines valid keys in OpenAI."""
+    ALLOWED_TOOLS: str
+    ARGS: str
     ARGUMENTS: str
     CALL_ID: str
     CONTAINER: str
@@ -27,13 +29,18 @@ class Key:
     REASONING: str
     ROLE: str
     SCHEMA: str
+    REQUIRE_APPROVAL: str
     REQUIRED: str
+    SERVER_LABEL: str
+    SERVER_NAME: str
+    SERVER_URL: str
     STATUS: str
     STRICT: str
     SUMMARY: str
     TEXT: str
     TIMEOUT: str
     TITLE: str
+    TOOL_NAME: str
     TOOLS: str
     TYPE: str
@@ -49,10 +56,14 @@ class InputType:
     INPUT_IMAGE: str
     INPUT_TEXT: str
     JSON_SCHEMA: str
+    MCP: str
+    MCP_CALL: str
+    NEVER: str
     NULL: str
     OUTPUT_TEXT: str
     REASONING: str
     SUMMARY_TEXT: str
+    WEB_SEARCH_PREVIEW: str
 class OutputType:
     """Defines valid output types in OpenAI."""
@@ -67,6 +78,8 @@ class OutputType:
     IMAGE: str
     INCOMPLETE: str
     ITEM_DONE: str
+    MCP_CALL: str
+    MCP_LIST_TOOLS: str
     MESSAGE: str
     OPEN_PAGE: str
     REASONING: str

gllm_inference/schema/__init__.pyi CHANGED Viewed

@@ -2,7 +2,9 @@ from gllm_inference.schema.attachment import Attachment as Attachment
 from gllm_inference.schema.code_exec_result import CodeExecResult as CodeExecResult
 from gllm_inference.schema.config import TruncationConfig as TruncationConfig
 from gllm_inference.schema.enums import AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, MessageRole as MessageRole, TruncateSide as TruncateSide
+from gllm_inference.schema.lm_input import LMInput as LMInput
 from gllm_inference.schema.lm_output import LMOutput as LMOutput
+from gllm_inference.schema.mcp import MCPCall as MCPCall, MCPServer as MCPServer
 from gllm_inference.schema.message import Message as Message
 from gllm_inference.schema.model_id import ModelId as ModelId, ModelProvider as ModelProvider
 from gllm_inference.schema.reasoning import Reasoning as Reasoning
@@ -11,4 +13,4 @@ from gllm_inference.schema.tool_call import ToolCall as ToolCall
 from gllm_inference.schema.tool_result import ToolResult as ToolResult
 from gllm_inference.schema.type_alias import EMContent as EMContent, MessageContent as MessageContent, ResponseSchema as ResponseSchema, Vector as Vector
-__all__ = ['Attachment', 'AttachmentType', 'BatchStatus', 'CodeExecResult', 'EMContent', 'EmitDataType', 'InputTokenDetails', 'MessageContent', 'LMOutput', 'ModelId', 'ModelProvider', 'Message', 'MessageRole', 'OutputTokenDetails', 'Reasoning', 'ResponseSchema', 'TokenUsage', 'ToolCall', 'ToolResult', 'TruncateSide', 'TruncationConfig', 'Vector']
+__all__ = ['Attachment', 'AttachmentType', 'BatchStatus', 'CodeExecResult', 'EMContent', 'EmitDataType', 'MCPCall', 'MCPServer', 'InputTokenDetails', 'MessageContent', 'LMInput', 'LMOutput', 'ModelId', 'ModelProvider', 'Message', 'MessageRole', 'OutputTokenDetails', 'Reasoning', 'ResponseSchema', 'TokenUsage', 'ToolCall', 'ToolResult', 'TruncateSide', 'TruncationConfig', 'Vector']

gllm_inference/schema/lm_input.pyi ADDED Viewed

@@ -0,0 +1,4 @@
+from gllm_inference.schema.message import Message as Message
+from gllm_inference.schema.type_alias import MessageContent as MessageContent
+LMInput = list[Message] | list[MessageContent] | str

gllm_inference/schema/lm_output.pyi CHANGED Viewed

@@ -1,5 +1,6 @@
 from gllm_core.schema import Chunk as Chunk
 from gllm_inference.schema.code_exec_result import CodeExecResult as CodeExecResult
+from gllm_inference.schema.mcp import MCPCall as MCPCall
 from gllm_inference.schema.reasoning import Reasoning as Reasoning
 from gllm_inference.schema.token_usage import TokenUsage as TokenUsage
 from gllm_inference.schema.tool_call import ToolCall as ToolCall
@@ -24,6 +25,8 @@ class LMOutput(BaseModel):
         citations (list[Chunk]): The citations, if the language model outputs citations. Defaults to an empty list.
         code_exec_results (list[CodeExecResult]): The code execution results, if the language model decides to
             execute code. Defaults to an empty list.
+        mcp_calls (list[MCPCall]): The MCP calls, if the language model decides to invoke MCP tools.
+            Defaults to an empty list.
     """
     response: str
     tool_calls: list[ToolCall]
@@ -34,3 +37,4 @@ class LMOutput(BaseModel):
     reasoning: list[Reasoning]
     citations: list[Chunk]
     code_exec_results: list[CodeExecResult]
+    mcp_calls: list[MCPCall]

gllm_inference/schema/mcp.pyi ADDED Viewed

@@ -0,0 +1,31 @@
+from pydantic import BaseModel
+from typing import Any
+class MCPServer(BaseModel):
+    """Defines an MCP server.
+    Attributes:
+        url (str): The URL of the MCP server.
+        name (str): The name of the MCP server.
+        allowed_tools (list[str] | None): The allowed tools of the MCP server.
+            Defaults to None, in which case all tools are allowed.
+    """
+    url: str
+    name: str
+    allowed_tools: list[str] | None
+class MCPCall(BaseModel):
+    """Defines an MCP call.
+    Attributes:
+        id (str): The ID of the MCP call. Defaults to an empty string.
+        server_name (str): The name of the MCP server. Defaults to an empty string.
+        tool_name (str): The name of the tool. Defaults to an empty string.
+        args (dict[str, Any]): The arguments of the tool. Defaults to an empty dictionary.
+        output (str | None): The output of the tool. Defaults to None.
+    """
+    id: str
+    server_name: str
+    tool_name: str
+    args: dict[str, Any]
+    output: str | None

gllm_inference.cpython-311-darwin.so CHANGED Viewed

Binary file

gllm_inference.pyi CHANGED Viewed

@@ -85,9 +85,9 @@ import gllm_core.schema.tool
 import langchain_core.tools
 import gllm_inference.schema.BatchStatus
 import gllm_inference.schema.EmitDataType
+import gllm_inference.schema.LMInput
 import gllm_inference.schema.LMOutput
 import gllm_inference.schema.Message
-import gllm_inference.schema.MessageContent
 import gllm_inference.schema.Reasoning
 import gllm_inference.schema.ResponseSchema
 import gllm_inference.schema.TokenUsage
@@ -107,8 +107,11 @@ import inspect
 import time
 import jsonschema
 import gllm_inference.lm_invoker.batch.BatchOperations
+import gllm_inference.schema.MessageContent
 import gllm_inference.utils.validate_string_enum
 import gllm_inference.schema.CodeExecResult
+import gllm_inference.schema.MCPCall
+import gllm_inference.schema.MCPServer
 import xai_sdk
 import xai_sdk.chat
 import xai_sdk.search

{gllm_inference_binary-0.5.29.dist-info → gllm_inference_binary-0.5.31.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: gllm-inference-binary
-Version: 0.5.29
+Version: 0.5.31
 Summary: A library containing components related to model inferences in Gen AI applications.
 Author-email: Henry Wicaksono <henry.wicaksono@gdplabs.id>, Resti Febrina <resti.febrina@gdplabs.id>
 Requires-Python: <3.14,>=3.11

{gllm_inference_binary-0.5.29.dist-info → gllm_inference_binary-0.5.31.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-gllm_inference.cpython-311-darwin.so,sha256=HVQ1vMBsfW-XKRrFAYfaocR5Di4I7cpU42hyz_5h2BM,4502344
-gllm_inference.pyi,sha256=iG4pivEnE8Umn48VYXGF3sHauCnmJG7cOA-fYhwggmc,4068
+gllm_inference.cpython-311-darwin.so,sha256=tTzcaJo1-mdRae0c7wnl2-82l43maslSQndtHVp6oc8,4591000
+gllm_inference.pyi,sha256=_kg-gYI4Dx_w13ZGSP-2sC14z7u_GSLeanMYuRkfnZA,4181
 gllm_inference/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gllm_inference/constants.pyi,sha256=EFVMtK3xDK2yjGoHp8EL3LeRZWhIefVKClI9jvbfQQ0,267
 gllm_inference/builder/__init__.pyi,sha256=usz2lvfwO4Yk-ZGKXbCWG1cEr3nlQXxMNDNC-2yc1NM,500
@@ -36,26 +36,26 @@ gllm_inference/exceptions/error_parser.pyi,sha256=IOfa--NpLUW5E9Qq0mwWi6ZpTAbUyy
 gllm_inference/exceptions/exceptions.pyi,sha256=Bv996qLa_vju0Qjf4GewMxdkq8CV9LRZb0S6289DldA,5725
 gllm_inference/exceptions/provider_error_map.pyi,sha256=P1WnhWkM103FW6hqMfNZBOmYSWOmsJtll3VQV8DGb8E,1210
 gllm_inference/lm_invoker/__init__.pyi,sha256=NmQSqObPjevEP1KbbrNnaz4GMh175EVPERZ19vK5Emc,1202
-gllm_inference/lm_invoker/anthropic_lm_invoker.pyi,sha256=1uCgSpc2da3JHuk3oZ8nqXX0m0ATUleA81nNn8b3b98,16430
-gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=9gzto0yuZySR_8FII0PzbKLN_bCCdDP2vXQlVwnK9V8,14580
-gllm_inference/lm_invoker/bedrock_lm_invoker.pyi,sha256=fAJCLdOMcR4OJpNFj3vN0TiNBOR8PzC1xPvqJDEwlJc,12690
-gllm_inference/lm_invoker/datasaur_lm_invoker.pyi,sha256=QS84w3WpD3Oyl5HdxrucsadCmsHE8gn6Ewl3l01DCgI,9203
-gllm_inference/lm_invoker/google_lm_invoker.pyi,sha256=LG9lE8IXnObl2Uq9VPLeBT4WRqE5zUV_2gojSHiSqwQ,17052
-gllm_inference/lm_invoker/langchain_lm_invoker.pyi,sha256=NjlxGHZZ-GTZTwz4XviU6a0eKMlwcTXy4wUiCrmnxPQ,13599
-gllm_inference/lm_invoker/litellm_lm_invoker.pyi,sha256=_c56ewpEQ-Ywj5ofFzRYBvQgefR7Q_WkcQt97lnIFgg,13128
-gllm_inference/lm_invoker/lm_invoker.pyi,sha256=zlhvzAs2oWX3vv_HcYpl-0qSRqLZ4Tb020CmI4Oixto,8202
-gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi,sha256=_hOAde_Faph3JoGYh7zLch6BRc2Lam8PXZvi5-PkL-E,14938
-gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=wPTJr5DkXpoXpxw3MoaqEnzAOUanBRGUu954KdKDaVU,19649
+gllm_inference/lm_invoker/anthropic_lm_invoker.pyi,sha256=rJeQ9jpUIvcf5z1BB9Lksqf37ZgUzcnFqDMstOl3-kk,17235
+gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=EXPFTsPwUk42B12MVDhh98maCFSkdPWfqJeht5Wjpq4,14783
+gllm_inference/lm_invoker/bedrock_lm_invoker.pyi,sha256=uZ9wpzOKSOvgu1ICMLqEXcrOE3RIbUmqHmgtuwBekPg,12802
+gllm_inference/lm_invoker/datasaur_lm_invoker.pyi,sha256=J_tfnIgVDr-zQ7YE5_TKMyZyA336ly04g1l-ZKnr1As,9315
+gllm_inference/lm_invoker/google_lm_invoker.pyi,sha256=4-3CwfBcDh6thxkidRcYbGVp9bCDkQTemat6VBHsUC8,17164
+gllm_inference/lm_invoker/langchain_lm_invoker.pyi,sha256=hnQcScOHs31xx4GB6YI-RnREiNg7r8fvQrmGBscQlu0,13711
+gllm_inference/lm_invoker/litellm_lm_invoker.pyi,sha256=eEPvDOCj55f9wJ0neNl4O9XQWvSI6YWJgHZMHOaYGRk,13240
+gllm_inference/lm_invoker/lm_invoker.pyi,sha256=hjolpN8BzUrhgy8MSpnYxhrlWPJO1LXeCFGlBhQ-eBw,8152
+gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi,sha256=XV-KjulVYAhU0e2giqOdHUGCSCrybXRWsrtzZByqOXI,15050
+gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=PNlhhb_lVk91dNSuha9ZuK6YaRDYVnc94Tbnj3z9wds,21769
 gllm_inference/lm_invoker/xai_lm_invoker.pyi,sha256=rV8D3E730OUmwK7jELKSziMUl7MnpbfxMAvMuq8-Aew,15687
 gllm_inference/lm_invoker/batch/__init__.pyi,sha256=W4W-_yfk7lL20alREJai6GnwuQvdlKRfwQCX4mQK4XI,127
-gllm_inference/lm_invoker/batch/batch_operations.pyi,sha256=Pf_gORe6Oh6cDT_sJhF0h8I7rEsTbwQZMG85NOQw3xQ,2965
+gllm_inference/lm_invoker/batch/batch_operations.pyi,sha256=Oo7hoyPSfPZdy1mXvSdvtRndvq-XTIbPIjEoGvJj5C0,5372
 gllm_inference/lm_invoker/schema/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gllm_inference/lm_invoker/schema/anthropic.pyi,sha256=6lreMyHKRfZzX5NBYKnQf1Z6RzXBjTvqZj2VbMeaTLQ,1098
 gllm_inference/lm_invoker/schema/bedrock.pyi,sha256=FJLY-ZkkLUYDV48pfsLatnot4ev_xxz9xAayLK28CpU,1027
 gllm_inference/lm_invoker/schema/datasaur.pyi,sha256=aA4DhTXIezwLvFzphR24a5ueVln2FCBIloP9Hbt3iz4,230
 gllm_inference/lm_invoker/schema/google.pyi,sha256=AIsNgq0ZZuicHmx4bL7z6q-946T05nWts3HUeA8hhHQ,505
 gllm_inference/lm_invoker/schema/langchain.pyi,sha256=rZcIxuvABI4pKfyVvkRBRqfJJogZ67EFPydpubHt49c,429
-gllm_inference/lm_invoker/schema/openai.pyi,sha256=9KjOJMnDyPs4hsysD8qFEMObUkbnxp6U9PmRIiUa3h4,1926
+gllm_inference/lm_invoker/schema/openai.pyi,sha256=oju4itbH6mm-yMCqX3m-448XJra4cg6oHHq7abYGM_g,2187
 gllm_inference/lm_invoker/schema/openai_compatible.pyi,sha256=m3bL2hVpxI_crURIi1bGDUqMy1Z5OgKBVU_-BkhX1mg,1166
 gllm_inference/lm_invoker/schema/xai.pyi,sha256=cWnbJmDtllqRH3NXpQbiXgkNBcUXr8ksDSDywcgJebE,632
 gllm_inference/model/__init__.pyi,sha256=qClHIgljqhPPCKlGTKmHsWgYb4_hADybxtC2q1U8a5Q,593
@@ -83,12 +83,14 @@ gllm_inference/prompt_formatter/prompt_formatter.pyi,sha256=UkcPi5ao98OGJyNRsqfh
 gllm_inference/request_processor/__init__.pyi,sha256=hVnfdNZnkTBJHnmLtN3Na4ANP0yK6AstWdIizVr2Apo,227
 gllm_inference/request_processor/lm_request_processor.pyi,sha256=VnYc8E3Iayyhw-rPnGPfTKuO3ohgFsS8HPrZJeyES5I,5889
 gllm_inference/request_processor/uses_lm_mixin.pyi,sha256=Yu0XPNuHxq1tWBviHTPw1oThojneFwGHepvGjBXxKQA,6382
-gllm_inference/schema/__init__.pyi,sha256=6QFARJnD3u8Z9Z3jbmJlH_aFRHYWMmA9naPyhKugOOI,1501
+gllm_inference/schema/__init__.pyi,sha256=Kc0N_kISRf8wkw07tY5ka9wG_0qdZAvrFMej0zxvIZE,1679
 gllm_inference/schema/attachment.pyi,sha256=jApuzjOHJDCz4lr4MlHzBgIndh559nbWu2Xp1fk3hso,3297
 gllm_inference/schema/code_exec_result.pyi,sha256=ZTHh6JtRrPIdQ059P1UAiD2L-tAO1_S5YcMsAXfJ5A0,559
 gllm_inference/schema/config.pyi,sha256=rAL_UeXyQeXVk1P2kqd8vFWOMwmKenfpQLtvMP74t9s,674
 gllm_inference/schema/enums.pyi,sha256=XQpohUC7_9nFdEmSZHj_4YmOAwM_C5jvTWw_RN-JiFk,901
-gllm_inference/schema/lm_output.pyi,sha256=GafJV0KeD-VSwWkwG1oz-uruXrQ7KDZTuoojPCBRpg8,1956
+gllm_inference/schema/lm_input.pyi,sha256=A5pjz1id6tP9XRNhzQrbmzd66C_q3gzo0UP8rCemz6Q,193
+gllm_inference/schema/lm_output.pyi,sha256=15y-M0lpqM_fSlErPKiN1Pj-ikl5NtFBcWLMYsRidt8,2182
+gllm_inference/schema/mcp.pyi,sha256=Vwu8E2BDl6FvvnI42gIyY3Oki1BdwRE3Uh3aV0rmhQU,1014
 gllm_inference/schema/message.pyi,sha256=VP9YppKj2mo1esl9cy6qQO9m2mMHUjTmfGDdyUor880,2220
 gllm_inference/schema/model_id.pyi,sha256=qrr0x4qkd6cGIbc4XATWJb0uckKhd1sAdR_xT7vGIXI,5491
 gllm_inference/schema/reasoning.pyi,sha256=SlTuiDw87GdnAn-I6YOPIJRhEBiwQljM46JohG05guQ,562
@@ -101,7 +103,7 @@ gllm_inference/utils/io_utils.pyi,sha256=7kUTacHAVRYoemFUOjCH7-Qmw-YsQGd6rGYxjf_
 gllm_inference/utils/langchain.pyi,sha256=VluQiHkGigDdqLUbhB6vnXiISCP5hHqV0qokYY6dC1A,1164
 gllm_inference/utils/validation.pyi,sha256=toxBtRp-VItC_X7sNi-GDd7sjibBdWMrR0q01OI2D7k,385
 gllm_inference.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
-gllm_inference_binary-0.5.29.dist-info/METADATA,sha256=Nik4n41nB3IXYTE-OrbmBQRyWBbhiBpciBazCzsy7ts,4857
-gllm_inference_binary-0.5.29.dist-info/WHEEL,sha256=s8TBzVnsSJujxqbMe-G5Vh0IPlslLTnVva4BiQ75Hjo,105
-gllm_inference_binary-0.5.29.dist-info/top_level.txt,sha256=FpOjtN80F-qVNgbScXSEyqa0w09FYn6301iq6qt69IQ,15
-gllm_inference_binary-0.5.29.dist-info/RECORD,,
+gllm_inference_binary-0.5.31.dist-info/METADATA,sha256=rBFGyTRcClvhOsldXO2FY68jXOmDDkV-x64jv6Liask,4857
+gllm_inference_binary-0.5.31.dist-info/WHEEL,sha256=s8TBzVnsSJujxqbMe-G5Vh0IPlslLTnVva4BiQ75Hjo,105
+gllm_inference_binary-0.5.31.dist-info/top_level.txt,sha256=FpOjtN80F-qVNgbScXSEyqa0w09FYn6301iq6qt69IQ,15
+gllm_inference_binary-0.5.31.dist-info/RECORD,,

{gllm_inference_binary-0.5.29.dist-info → gllm_inference_binary-0.5.31.dist-info}/WHEEL RENAMED Viewed

File without changes

{gllm_inference_binary-0.5.29.dist-info → gllm_inference_binary-0.5.31.dist-info}/top_level.txt RENAMED Viewed

File without changes