gllm-inference-binary 0.5.29__cp313-cp313-macosx_13_0_x86_64.whl → 0.5.31__cp313-cp313-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ from gllm_core.utils.retry import RetryConfig as RetryConfig
5
5
  from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
6
6
  from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
7
7
  from gllm_inference.lm_invoker.schema.anthropic import InputType as InputType, Key as Key, OutputType as OutputType
8
- from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
8
+ from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMInput as LMInput, LMOutput as LMOutput, Message as Message, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
9
9
  from langchain_core.tools import Tool as LangChainTool
10
10
  from typing import Any
11
11
 
@@ -202,38 +202,66 @@ class AnthropicLMInvoker(BaseLMInvoker):
202
202
 
203
203
  Batch processing:
204
204
  The `AnthropicLMInvoker` supports batch processing, which allows the language model to process multiple
205
- requests in a single call. The batch processing operations include:
205
+ requests in a single call. Batch processing is supported through the `batch` attribute.
206
+
207
+ Usage example:
208
+ ```python
209
+ requests = {"request_1": "What color is the sky?", "request_2": "What color is the grass?"}
210
+ results = await lm_invoker.batch.invoke(requests)
211
+ ```
212
+
213
+ Output example:
214
+ ```python
215
+ {
216
+ "request_1": LMOutput(response="The sky is blue."),
217
+ "request_2": LMOutput(finish_details={"type": "error", "error": {"message": "...", ...}, ...}),
218
+ }
219
+ ```
220
+
221
+ The `AnthropicLMInvoker` also supports the following standalone batch processing operations:
206
222
 
207
223
  1. Create a batch job:
208
- >>> requests = {"request_1": "What color is the sky?", "request_2": "What color is the grass?"}
209
- >>> batch_id = await lm_invoker.batch.create(requests)
210
- >>> print(batch_id)
211
- "batch_123"
224
+ ```python
225
+ requests = {"request_1": "What color is the sky?", "request_2": "What color is the grass?"}
226
+ batch_id = await lm_invoker.batch.create(requests)
227
+ ```
212
228
 
213
229
  2. Get the status of a batch job:
214
- >>> status = await lm_invoker.batch.status(batch_id)
215
- >>> print(status)
216
- "finished"
230
+ ```python
231
+ status = await lm_invoker.batch.status(batch_id)
232
+ ```
217
233
 
218
234
  3. Retrieve the results of a batch job:
219
- >>> results = await lm_invoker.batch.retrieve(batch_id)
220
- >>> print(results)
235
+ ```python
236
+ results = await lm_invoker.batch.retrieve(batch_id)
237
+ ```
238
+
239
+ Output example:
240
+ ```python
221
241
  {
222
242
  "request_1": LMOutput(response="The sky is blue."),
223
243
  "request_2": LMOutput(finish_details={"type": "error", "error": {"message": "...", ...}, ...}),
224
244
  }
245
+ ```
225
246
 
226
247
  4. List the batch jobs:
227
- >>> batch_jobs = await lm_invoker.batch.list()
228
- >>> print(batch_jobs)
248
+ ```python
249
+ batch_jobs = await lm_invoker.batch.list()
250
+ ```
251
+
252
+ Output example:
253
+ ```python
229
254
  [
230
255
  {"id": "batch_123", "status": "finished"},
231
256
  {"id": "batch_456", "status": "in_progress"},
232
257
  {"id": "batch_789", "status": "canceling"},
233
258
  ]
259
+ ```
234
260
 
235
261
  5. Cancel a batch job:
236
- >>> await lm_invoker.batch.cancel(batch_id)
262
+ ```python
263
+ await lm_invoker.batch.cancel(batch_id)
264
+ ```
237
265
 
238
266
  Output types:
239
267
  The output of the `AnthropicLMInvoker` can either be:
@@ -255,6 +283,7 @@ class AnthropicLMInvoker(BaseLMInvoker):
255
283
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
256
284
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
257
285
  Defaults to an empty list.
286
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
258
287
  '''
259
288
  client: Incomplete
260
289
  thinking: Incomplete
@@ -26,6 +26,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
26
26
  for non-reasoning models. If None, the model will perform medium reasoning effort.
27
27
  reasoning_summary (ReasoningSummary | None): The reasoning summary level for reasoning models. Not allowed
28
28
  for non-reasoning models. If None, no summary will be generated.
29
+ mcp_servers (list[MCPServer]): The list of MCP servers to enable MCP tool calling.
29
30
  code_interpreter (bool): Whether to enable the code interpreter. Currently not supported.
30
31
  web_search (bool): Whether to enable the web search. Currently not supported.
31
32
 
@@ -218,6 +219,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
218
219
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
219
220
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
220
221
  Defaults to an empty list.
222
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
221
223
  '''
222
224
  client: Incomplete
223
225
  def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
@@ -1,28 +1,44 @@
1
- from gllm_inference.schema import BatchStatus as BatchStatus, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent
1
+ from gllm_inference.exceptions import InvokerRuntimeError as InvokerRuntimeError
2
+ from gllm_inference.schema import BatchStatus as BatchStatus, LMInput as LMInput, LMOutput as LMOutput
2
3
  from typing import Any
3
4
 
5
+ DEFAULT_STATUS_CHECK_INTERVAL: float
6
+
4
7
  class BatchOperations:
5
8
  """Handles batch operations for an LM invoker.
6
9
 
7
10
  This class provides a wrapper around the batch operations of an LM invoker.
8
- It provides a simple interface for creating, retrieving, and canceling batch jobs.
9
-
10
- This enables LM invokers to support the following batch operations:
11
-
12
- Create a batch job:
13
- >>> batch_id = await lm_invoker.batch.create(...)
14
-
15
- Get the status of a batch job:
16
- >>> status = await lm_invoker.batch.status(batch_id)
17
-
18
- Retrieve the results of a batch job:
19
- >>> results = await lm_invoker.batch.retrieve(batch_id)
20
-
21
- List the batch jobs:
22
- >>> batch_jobs = await lm_invoker.batch.list()
23
-
24
- Cancel a batch job:
25
- >>> await lm_invoker.batch.cancel(batch_id)
11
+ It provides a simple interface to perform batch invocation:
12
+ ```python
13
+ results = await lm_invoker.batch.invoke(...)
14
+ ```
15
+
16
+ Additionally, it also supports the following standalone batch operations:
17
+
18
+ 1. Create a batch job:
19
+ ```python
20
+ batch_id = await lm_invoker.batch.create(...)
21
+ ```
22
+
23
+ 2. Get the status of a batch job:
24
+ ```python
25
+ status = await lm_invoker.batch.status(batch_id)
26
+ ```
27
+
28
+ 3. Retrieve the results of a batch job:
29
+ ```python
30
+ results = await lm_invoker.batch.retrieve(batch_id)
31
+ ```
32
+
33
+ 4. List the batch jobs:
34
+ ```python
35
+ batch_jobs = await lm_invoker.batch.list()
36
+ ```
37
+
38
+ 5. Cancel a batch job:
39
+ ```python
40
+ await lm_invoker.batch.cancel(batch_id)
41
+ ```
26
42
  """
27
43
  def __init__(self, invoker: BaseLMInvoker) -> None:
28
44
  """Initializes the batch operations.
@@ -30,12 +46,47 @@ class BatchOperations:
30
46
  Args:
31
47
  invoker (BaseLMInvoker): The LM invoker to use for the batch operations.
32
48
  """
33
- async def create(self, requests: dict[str, list[Message] | list[MessageContent] | str], hyperparameters: dict[str, Any] | None = None) -> str:
49
+ async def invoke(self, requests: dict[str, LMInput], hyperparameters: dict[str, Any] | None = None, status_check_interval: float = ..., max_iterations: int | None = None) -> dict[str, LMOutput]:
50
+ """Invokes the language model in batch mode.
51
+
52
+ This method orchestrates the entire batch invocation process, including;
53
+ 1. Creating a batch job.
54
+ 2. Iteratively checking the status of the batch job until it is finished.
55
+ 3. Retrieving the results of the batch job.
56
+ The method includes retry logic with exponential backoff for transient failures.
57
+
58
+ Args:
59
+ requests (dict[str, LMInput]): The dictionary of requests that maps request ID to the request.
60
+ Each request must be a valid input for the language model.
61
+ 1. If the request is a list of Message objects, it is used as is.
62
+ 2. If the request is a list of MessageContent or a string, it is converted into a user message.
63
+ hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the language model.
64
+ Defaults to None, in which case the default hyperparameters are used.
65
+ status_check_interval (float, optional): The interval in seconds to check the status of the batch job.
66
+ Defaults to DEFAULT_STATUS_CHECK_INTERVAL.
67
+ max_iterations (int | None, optional): The maximum number of iterations to check the status of the batch
68
+ job. Defaults to None, in which case the number of iterations is infinite.
69
+
70
+ Returns:
71
+ dict[str, LMOutput]: The results of the batch job.
72
+
73
+ Raises:
74
+ CancelledError: If the invocation is cancelled.
75
+ ModelNotFoundError: If the model is not found.
76
+ ProviderAuthError: If the model authentication fails.
77
+ ProviderInternalError: If the model internal error occurs.
78
+ ProviderInvalidArgsError: If the model parameters are invalid.
79
+ ProviderOverloadedError: If the model is overloaded.
80
+ ProviderRateLimitError: If the model rate limit is exceeded.
81
+ TimeoutError: If the invocation times out.
82
+ ValueError: If the messages are not in the correct format.
83
+ """
84
+ async def create(self, requests: dict[str, LMInput], hyperparameters: dict[str, Any] | None = None) -> str:
34
85
  """Creates a new batch job.
35
86
 
36
87
  Args:
37
- requests (dict[str, list[Message] | list[MessageContent] | str]): The dictionary of requests that maps
38
- request ID to the request. Each request must be a valid input for the language model.
88
+ requests (dict[str, LMInput]): The dictionary of requests that maps request ID to the request.
89
+ Each request must be a valid input for the language model.
39
90
  1. If the request is a list of Message objects, it is used as is.
40
91
  2. If the request is a list of MessageContent or a string, it is converted into a user message.
41
92
  hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the language model.
@@ -179,6 +179,7 @@ class BedrockLMInvoker(BaseLMInvoker):
179
179
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
180
180
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
181
181
  Defaults to an empty list.
182
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
182
183
  '''
183
184
  session: Incomplete
184
185
  client_kwargs: Incomplete
@@ -119,6 +119,7 @@ class DatasaurLMInvoker(OpenAICompatibleLMInvoker):
119
119
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
120
120
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
121
121
  Defaults to an empty list.
122
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
122
123
  '''
123
124
  client: Incomplete
124
125
  citations: Incomplete
@@ -254,6 +254,7 @@ class GoogleLMInvoker(BaseLMInvoker):
254
254
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
255
255
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
256
256
  Defaults to an empty list.
257
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
257
258
  '''
258
259
  client_params: Incomplete
259
260
  thinking: Incomplete
@@ -205,6 +205,7 @@ class LangChainLMInvoker(BaseLMInvoker):
205
205
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
206
206
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
207
207
  Defaults to an empty list.
208
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
208
209
  '''
209
210
  model: Incomplete
210
211
  def __init__(self, model: BaseChatModel | None = None, model_class_path: str | None = None, model_name: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None) -> None:
@@ -226,6 +226,7 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
226
226
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
227
227
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
228
228
  Defaults to an empty list.
229
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
229
230
  '''
230
231
  completion: Incomplete
231
232
  def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
@@ -7,7 +7,7 @@ from gllm_core.utils import RetryConfig
7
7
  from gllm_inference.constants import DOCUMENT_MIME_TYPES as DOCUMENT_MIME_TYPES, INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT
8
8
  from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_to_base_invoker_error as convert_to_base_invoker_error
9
9
  from gllm_inference.lm_invoker.batch import BatchOperations as BatchOperations
10
- from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole, ModelId as ModelId, Reasoning as Reasoning, ResponseSchema as ResponseSchema, ToolCall as ToolCall, ToolResult as ToolResult
10
+ from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMInput as LMInput, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole, ModelId as ModelId, Reasoning as Reasoning, ResponseSchema as ResponseSchema, ToolCall as ToolCall, ToolResult as ToolResult
11
11
  from langchain_core.tools import Tool as LangChainTool
12
12
  from typing import Any
13
13
 
@@ -128,7 +128,7 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
128
128
  This method clears the response schema for the language model by calling the `set_response_schema` method with
129
129
  None.
130
130
  """
131
- async def invoke(self, messages: list[Message] | list[MessageContent] | str, hyperparameters: dict[str, Any] | None = None, event_emitter: EventEmitter | None = None) -> str | LMOutput:
131
+ async def invoke(self, messages: LMInput, hyperparameters: dict[str, Any] | None = None, event_emitter: EventEmitter | None = None) -> str | LMOutput:
132
132
  """Invokes the language model.
133
133
 
134
134
  This method validates the messages and invokes the language model. It handles both standard
@@ -136,7 +136,7 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
136
136
  The method includes retry logic with exponential backoff for transient failures.
137
137
 
138
138
  Args:
139
- messages (list[Message] | list[MessageContent] | str): The input messages for the language model.
139
+ messages (LMInput): The input messages for the language model.
140
140
  1. If a list of Message objects is provided, it is used as is.
141
141
  2. If a list of MessageContent or a string is provided, it is converted into a user message.
142
142
  hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the language model.
@@ -230,6 +230,7 @@ class OpenAICompatibleLMInvoker(BaseLMInvoker):
230
230
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
231
231
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
232
232
  Defaults to an empty list.
233
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
233
234
  '''
234
235
  client: Incomplete
235
236
  def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
@@ -5,7 +5,7 @@ from gllm_core.utils.retry import RetryConfig as RetryConfig
5
5
  from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
6
6
  from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
7
7
  from gllm_inference.lm_invoker.schema.openai import InputType as InputType, Key as Key, OutputType as OutputType, ReasoningEffort as ReasoningEffort, ReasoningSummary as ReasoningSummary
8
- from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, CodeExecResult as CodeExecResult, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
8
+ from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, CodeExecResult as CodeExecResult, EmitDataType as EmitDataType, LMOutput as LMOutput, MCPCall as MCPCall, MCPServer as MCPServer, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
9
9
  from gllm_inference.utils import validate_string_enum as validate_string_enum
10
10
  from langchain_core.tools import Tool as LangChainTool
11
11
  from typing import Any
@@ -30,6 +30,7 @@ class OpenAILMInvoker(BaseLMInvoker):
30
30
  for non-reasoning models. If None, the model will perform medium reasoning effort.
31
31
  reasoning_summary (ReasoningSummary | None): The reasoning summary level for reasoning models. Not allowed
32
32
  for non-reasoning models. If None, no summary will be generated.
33
+ mcp_servers (list[MCPServer]): The list of MCP servers to enable MCP tool calling.
33
34
  code_interpreter (bool): Whether to enable the code interpreter.
34
35
  web_search (bool): Whether to enable the web search.
35
36
 
@@ -202,6 +203,46 @@ class OpenAILMInvoker(BaseLMInvoker):
202
203
 
203
204
  Setting reasoning-related parameters for non-reasoning models will raise an error.
204
205
 
206
+ MCP tool calling:
207
+ The `OpenAILMInvoker` supports MCP tool calling. This feature can be enabled by providing a list of
208
+ MCP servers to the `mcp_servers` parameter. When MCP servers are provided and the model decides to call
209
+ an MCP tool, the MCP calls are stored in the `mcp_calls` attribute in the output.
210
+
211
+ Usage example:
212
+ ```python
213
+ from gllm_inference.schema import MCPServer
214
+ mcp_server_1 = MCPServer(
215
+ url="https://mcp_server_1.com",
216
+ name="mcp_server_1",
217
+ )
218
+ lm_invoker = OpenAILMInvoker(..., mcp_servers=[mcp_server_1])
219
+ ```
220
+
221
+ Output example:
222
+ ```python
223
+ LMOutput(
224
+ response="The result is 10.",
225
+ mcp_calls=[
226
+ MCPCall(
227
+ id="123",
228
+ server_name="mcp_server_1",
229
+ tool_name="mcp_tool_1",
230
+ args={"key": "value"},
231
+ output="The result is 10.",
232
+ ),
233
+ ],
234
+ )
235
+ ```
236
+
237
+ When streaming is enabled, the MCP call activities will be streamed with the `EventType.DATA` event type.
238
+ Streaming output example:
239
+ ```python
240
+ {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_list_tools\\"}", ...}\', ...}
241
+ {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_call\\"}", ...}\', ...}
242
+ {"type": "response", "value": "The result ", ...}
243
+ {"type": "response", "value": "is 10.", ...}
244
+ ```
245
+
205
246
  Code interpreter:
206
247
  The code interpreter is a feature that allows the language model to write and run Python code in a
207
248
  sandboxed environment to solve complex problems in domains like data analysis, coding, and math.
@@ -319,9 +360,11 @@ class OpenAILMInvoker(BaseLMInvoker):
319
360
  to cite the relevant sources. Defaults to an empty list.
320
361
  2.9. code_exec_results (list[CodeExecResult]): The code execution results, if the code interpreter is
321
362
  enabled and the language model decides to execute any codes. Defaults to an empty list.
363
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls, if the MCP servers are provided and the language model
364
+ decides to invoke MCP tools. Defaults to an empty list.
322
365
  '''
323
366
  client: Incomplete
324
- def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
367
+ def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
325
368
  """Initializes a new instance of the OpenAILMInvoker class.
326
369
 
327
370
  Args:
@@ -343,6 +386,8 @@ class OpenAILMInvoker(BaseLMInvoker):
343
386
  for non-reasoning models. If None, the model will perform medium reasoning effort. Defaults to None.
344
387
  reasoning_summary (ReasoningSummary | None, optional): The reasoning summary level for reasoning models.
345
388
  Not allowed for non-reasoning models. If None, no summary will be generated. Defaults to None.
389
+ mcp_servers (list[MCPServer] | None, optional): The MCP servers containing tools to be accessed by the
390
+ language model. Defaults to None.
346
391
  code_interpreter (bool, optional): Whether to enable the code interpreter. Defaults to False.
347
392
  web_search (bool, optional): Whether to enable the web search. Defaults to False.
348
393
 
@@ -2,6 +2,8 @@ from enum import StrEnum
2
2
 
3
3
  class Key:
4
4
  """Defines valid keys in OpenAI."""
5
+ ALLOWED_TOOLS: str
6
+ ARGS: str
5
7
  ARGUMENTS: str
6
8
  CALL_ID: str
7
9
  CONTAINER: str
@@ -27,13 +29,18 @@ class Key:
27
29
  REASONING: str
28
30
  ROLE: str
29
31
  SCHEMA: str
32
+ REQUIRE_APPROVAL: str
30
33
  REQUIRED: str
34
+ SERVER_LABEL: str
35
+ SERVER_NAME: str
36
+ SERVER_URL: str
31
37
  STATUS: str
32
38
  STRICT: str
33
39
  SUMMARY: str
34
40
  TEXT: str
35
41
  TIMEOUT: str
36
42
  TITLE: str
43
+ TOOL_NAME: str
37
44
  TOOLS: str
38
45
  TYPE: str
39
46
 
@@ -49,10 +56,14 @@ class InputType:
49
56
  INPUT_IMAGE: str
50
57
  INPUT_TEXT: str
51
58
  JSON_SCHEMA: str
59
+ MCP: str
60
+ MCP_CALL: str
61
+ NEVER: str
52
62
  NULL: str
53
63
  OUTPUT_TEXT: str
54
64
  REASONING: str
55
65
  SUMMARY_TEXT: str
66
+ WEB_SEARCH_PREVIEW: str
56
67
 
57
68
  class OutputType:
58
69
  """Defines valid output types in OpenAI."""
@@ -67,6 +78,8 @@ class OutputType:
67
78
  IMAGE: str
68
79
  INCOMPLETE: str
69
80
  ITEM_DONE: str
81
+ MCP_CALL: str
82
+ MCP_LIST_TOOLS: str
70
83
  MESSAGE: str
71
84
  OPEN_PAGE: str
72
85
  REASONING: str
@@ -2,7 +2,9 @@ from gllm_inference.schema.attachment import Attachment as Attachment
2
2
  from gllm_inference.schema.code_exec_result import CodeExecResult as CodeExecResult
3
3
  from gllm_inference.schema.config import TruncationConfig as TruncationConfig
4
4
  from gllm_inference.schema.enums import AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, MessageRole as MessageRole, TruncateSide as TruncateSide
5
+ from gllm_inference.schema.lm_input import LMInput as LMInput
5
6
  from gllm_inference.schema.lm_output import LMOutput as LMOutput
7
+ from gllm_inference.schema.mcp import MCPCall as MCPCall, MCPServer as MCPServer
6
8
  from gllm_inference.schema.message import Message as Message
7
9
  from gllm_inference.schema.model_id import ModelId as ModelId, ModelProvider as ModelProvider
8
10
  from gllm_inference.schema.reasoning import Reasoning as Reasoning
@@ -11,4 +13,4 @@ from gllm_inference.schema.tool_call import ToolCall as ToolCall
11
13
  from gllm_inference.schema.tool_result import ToolResult as ToolResult
12
14
  from gllm_inference.schema.type_alias import EMContent as EMContent, MessageContent as MessageContent, ResponseSchema as ResponseSchema, Vector as Vector
13
15
 
14
- __all__ = ['Attachment', 'AttachmentType', 'BatchStatus', 'CodeExecResult', 'EMContent', 'EmitDataType', 'InputTokenDetails', 'MessageContent', 'LMOutput', 'ModelId', 'ModelProvider', 'Message', 'MessageRole', 'OutputTokenDetails', 'Reasoning', 'ResponseSchema', 'TokenUsage', 'ToolCall', 'ToolResult', 'TruncateSide', 'TruncationConfig', 'Vector']
16
+ __all__ = ['Attachment', 'AttachmentType', 'BatchStatus', 'CodeExecResult', 'EMContent', 'EmitDataType', 'MCPCall', 'MCPServer', 'InputTokenDetails', 'MessageContent', 'LMInput', 'LMOutput', 'ModelId', 'ModelProvider', 'Message', 'MessageRole', 'OutputTokenDetails', 'Reasoning', 'ResponseSchema', 'TokenUsage', 'ToolCall', 'ToolResult', 'TruncateSide', 'TruncationConfig', 'Vector']
@@ -0,0 +1,4 @@
1
+ from gllm_inference.schema.message import Message as Message
2
+ from gllm_inference.schema.type_alias import MessageContent as MessageContent
3
+
4
+ LMInput = list[Message] | list[MessageContent] | str
@@ -1,5 +1,6 @@
1
1
  from gllm_core.schema import Chunk as Chunk
2
2
  from gllm_inference.schema.code_exec_result import CodeExecResult as CodeExecResult
3
+ from gllm_inference.schema.mcp import MCPCall as MCPCall
3
4
  from gllm_inference.schema.reasoning import Reasoning as Reasoning
4
5
  from gllm_inference.schema.token_usage import TokenUsage as TokenUsage
5
6
  from gllm_inference.schema.tool_call import ToolCall as ToolCall
@@ -24,6 +25,8 @@ class LMOutput(BaseModel):
24
25
  citations (list[Chunk]): The citations, if the language model outputs citations. Defaults to an empty list.
25
26
  code_exec_results (list[CodeExecResult]): The code execution results, if the language model decides to
26
27
  execute code. Defaults to an empty list.
28
+ mcp_calls (list[MCPCall]): The MCP calls, if the language model decides to invoke MCP tools.
29
+ Defaults to an empty list.
27
30
  """
28
31
  response: str
29
32
  tool_calls: list[ToolCall]
@@ -34,3 +37,4 @@ class LMOutput(BaseModel):
34
37
  reasoning: list[Reasoning]
35
38
  citations: list[Chunk]
36
39
  code_exec_results: list[CodeExecResult]
40
+ mcp_calls: list[MCPCall]
@@ -0,0 +1,31 @@
1
+ from pydantic import BaseModel
2
+ from typing import Any
3
+
4
+ class MCPServer(BaseModel):
5
+ """Defines an MCP server.
6
+
7
+ Attributes:
8
+ url (str): The URL of the MCP server.
9
+ name (str): The name of the MCP server.
10
+ allowed_tools (list[str] | None): The allowed tools of the MCP server.
11
+ Defaults to None, in which case all tools are allowed.
12
+ """
13
+ url: str
14
+ name: str
15
+ allowed_tools: list[str] | None
16
+
17
+ class MCPCall(BaseModel):
18
+ """Defines an MCP call.
19
+
20
+ Attributes:
21
+ id (str): The ID of the MCP call. Defaults to an empty string.
22
+ server_name (str): The name of the MCP server. Defaults to an empty string.
23
+ tool_name (str): The name of the tool. Defaults to an empty string.
24
+ args (dict[str, Any]): The arguments of the tool. Defaults to an empty dictionary.
25
+ output (str | None): The output of the tool. Defaults to None.
26
+ """
27
+ id: str
28
+ server_name: str
29
+ tool_name: str
30
+ args: dict[str, Any]
31
+ output: str | None
Binary file
gllm_inference.pyi CHANGED
@@ -85,9 +85,9 @@ import gllm_core.schema.tool
85
85
  import langchain_core.tools
86
86
  import gllm_inference.schema.BatchStatus
87
87
  import gllm_inference.schema.EmitDataType
88
+ import gllm_inference.schema.LMInput
88
89
  import gllm_inference.schema.LMOutput
89
90
  import gllm_inference.schema.Message
90
- import gllm_inference.schema.MessageContent
91
91
  import gllm_inference.schema.Reasoning
92
92
  import gllm_inference.schema.ResponseSchema
93
93
  import gllm_inference.schema.TokenUsage
@@ -107,8 +107,11 @@ import inspect
107
107
  import time
108
108
  import jsonschema
109
109
  import gllm_inference.lm_invoker.batch.BatchOperations
110
+ import gllm_inference.schema.MessageContent
110
111
  import gllm_inference.utils.validate_string_enum
111
112
  import gllm_inference.schema.CodeExecResult
113
+ import gllm_inference.schema.MCPCall
114
+ import gllm_inference.schema.MCPServer
112
115
  import xai_sdk
113
116
  import xai_sdk.chat
114
117
  import xai_sdk.search
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gllm-inference-binary
3
- Version: 0.5.29
3
+ Version: 0.5.31
4
4
  Summary: A library containing components related to model inferences in Gen AI applications.
5
5
  Author-email: Henry Wicaksono <henry.wicaksono@gdplabs.id>, Resti Febrina <resti.febrina@gdplabs.id>
6
6
  Requires-Python: <3.14,>=3.11
@@ -1,5 +1,5 @@
1
- gllm_inference.cpython-313-darwin.so,sha256=Qlq-y0XQbpruhXziR7mt3gRhQebszfQxseOgpDYeR_E,4457264
2
- gllm_inference.pyi,sha256=Sq4V7euYsYCTkL7t_C79lI01dUIC8q3nBwMol-Ft8ug,4088
1
+ gllm_inference.cpython-313-darwin.so,sha256=6soqTzLrehpPNyt907Jc00z3ZobOon0X98MSb-6tgtc,4545992
2
+ gllm_inference.pyi,sha256=N4O8vxfP3hurbRgxJ-ZCWM0rdjE10U76ZSL7tRKGuZM,4201
3
3
  gllm_inference/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  gllm_inference/constants.pyi,sha256=EFVMtK3xDK2yjGoHp8EL3LeRZWhIefVKClI9jvbfQQ0,267
5
5
  gllm_inference/builder/__init__.pyi,sha256=usz2lvfwO4Yk-ZGKXbCWG1cEr3nlQXxMNDNC-2yc1NM,500
@@ -36,26 +36,26 @@ gllm_inference/exceptions/error_parser.pyi,sha256=IOfa--NpLUW5E9Qq0mwWi6ZpTAbUyy
36
36
  gllm_inference/exceptions/exceptions.pyi,sha256=Bv996qLa_vju0Qjf4GewMxdkq8CV9LRZb0S6289DldA,5725
37
37
  gllm_inference/exceptions/provider_error_map.pyi,sha256=P1WnhWkM103FW6hqMfNZBOmYSWOmsJtll3VQV8DGb8E,1210
38
38
  gllm_inference/lm_invoker/__init__.pyi,sha256=NmQSqObPjevEP1KbbrNnaz4GMh175EVPERZ19vK5Emc,1202
39
- gllm_inference/lm_invoker/anthropic_lm_invoker.pyi,sha256=1uCgSpc2da3JHuk3oZ8nqXX0m0ATUleA81nNn8b3b98,16430
40
- gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=9gzto0yuZySR_8FII0PzbKLN_bCCdDP2vXQlVwnK9V8,14580
41
- gllm_inference/lm_invoker/bedrock_lm_invoker.pyi,sha256=fAJCLdOMcR4OJpNFj3vN0TiNBOR8PzC1xPvqJDEwlJc,12690
42
- gllm_inference/lm_invoker/datasaur_lm_invoker.pyi,sha256=QS84w3WpD3Oyl5HdxrucsadCmsHE8gn6Ewl3l01DCgI,9203
43
- gllm_inference/lm_invoker/google_lm_invoker.pyi,sha256=LG9lE8IXnObl2Uq9VPLeBT4WRqE5zUV_2gojSHiSqwQ,17052
44
- gllm_inference/lm_invoker/langchain_lm_invoker.pyi,sha256=NjlxGHZZ-GTZTwz4XviU6a0eKMlwcTXy4wUiCrmnxPQ,13599
45
- gllm_inference/lm_invoker/litellm_lm_invoker.pyi,sha256=_c56ewpEQ-Ywj5ofFzRYBvQgefR7Q_WkcQt97lnIFgg,13128
46
- gllm_inference/lm_invoker/lm_invoker.pyi,sha256=zlhvzAs2oWX3vv_HcYpl-0qSRqLZ4Tb020CmI4Oixto,8202
47
- gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi,sha256=_hOAde_Faph3JoGYh7zLch6BRc2Lam8PXZvi5-PkL-E,14938
48
- gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=wPTJr5DkXpoXpxw3MoaqEnzAOUanBRGUu954KdKDaVU,19649
39
+ gllm_inference/lm_invoker/anthropic_lm_invoker.pyi,sha256=rJeQ9jpUIvcf5z1BB9Lksqf37ZgUzcnFqDMstOl3-kk,17235
40
+ gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=EXPFTsPwUk42B12MVDhh98maCFSkdPWfqJeht5Wjpq4,14783
41
+ gllm_inference/lm_invoker/bedrock_lm_invoker.pyi,sha256=uZ9wpzOKSOvgu1ICMLqEXcrOE3RIbUmqHmgtuwBekPg,12802
42
+ gllm_inference/lm_invoker/datasaur_lm_invoker.pyi,sha256=J_tfnIgVDr-zQ7YE5_TKMyZyA336ly04g1l-ZKnr1As,9315
43
+ gllm_inference/lm_invoker/google_lm_invoker.pyi,sha256=4-3CwfBcDh6thxkidRcYbGVp9bCDkQTemat6VBHsUC8,17164
44
+ gllm_inference/lm_invoker/langchain_lm_invoker.pyi,sha256=hnQcScOHs31xx4GB6YI-RnREiNg7r8fvQrmGBscQlu0,13711
45
+ gllm_inference/lm_invoker/litellm_lm_invoker.pyi,sha256=eEPvDOCj55f9wJ0neNl4O9XQWvSI6YWJgHZMHOaYGRk,13240
46
+ gllm_inference/lm_invoker/lm_invoker.pyi,sha256=hjolpN8BzUrhgy8MSpnYxhrlWPJO1LXeCFGlBhQ-eBw,8152
47
+ gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi,sha256=XV-KjulVYAhU0e2giqOdHUGCSCrybXRWsrtzZByqOXI,15050
48
+ gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=PNlhhb_lVk91dNSuha9ZuK6YaRDYVnc94Tbnj3z9wds,21769
49
49
  gllm_inference/lm_invoker/xai_lm_invoker.pyi,sha256=rV8D3E730OUmwK7jELKSziMUl7MnpbfxMAvMuq8-Aew,15687
50
50
  gllm_inference/lm_invoker/batch/__init__.pyi,sha256=W4W-_yfk7lL20alREJai6GnwuQvdlKRfwQCX4mQK4XI,127
51
- gllm_inference/lm_invoker/batch/batch_operations.pyi,sha256=Pf_gORe6Oh6cDT_sJhF0h8I7rEsTbwQZMG85NOQw3xQ,2965
51
+ gllm_inference/lm_invoker/batch/batch_operations.pyi,sha256=Oo7hoyPSfPZdy1mXvSdvtRndvq-XTIbPIjEoGvJj5C0,5372
52
52
  gllm_inference/lm_invoker/schema/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  gllm_inference/lm_invoker/schema/anthropic.pyi,sha256=6lreMyHKRfZzX5NBYKnQf1Z6RzXBjTvqZj2VbMeaTLQ,1098
54
54
  gllm_inference/lm_invoker/schema/bedrock.pyi,sha256=FJLY-ZkkLUYDV48pfsLatnot4ev_xxz9xAayLK28CpU,1027
55
55
  gllm_inference/lm_invoker/schema/datasaur.pyi,sha256=aA4DhTXIezwLvFzphR24a5ueVln2FCBIloP9Hbt3iz4,230
56
56
  gllm_inference/lm_invoker/schema/google.pyi,sha256=AIsNgq0ZZuicHmx4bL7z6q-946T05nWts3HUeA8hhHQ,505
57
57
  gllm_inference/lm_invoker/schema/langchain.pyi,sha256=rZcIxuvABI4pKfyVvkRBRqfJJogZ67EFPydpubHt49c,429
58
- gllm_inference/lm_invoker/schema/openai.pyi,sha256=9KjOJMnDyPs4hsysD8qFEMObUkbnxp6U9PmRIiUa3h4,1926
58
+ gllm_inference/lm_invoker/schema/openai.pyi,sha256=oju4itbH6mm-yMCqX3m-448XJra4cg6oHHq7abYGM_g,2187
59
59
  gllm_inference/lm_invoker/schema/openai_compatible.pyi,sha256=m3bL2hVpxI_crURIi1bGDUqMy1Z5OgKBVU_-BkhX1mg,1166
60
60
  gllm_inference/lm_invoker/schema/xai.pyi,sha256=cWnbJmDtllqRH3NXpQbiXgkNBcUXr8ksDSDywcgJebE,632
61
61
  gllm_inference/model/__init__.pyi,sha256=qClHIgljqhPPCKlGTKmHsWgYb4_hADybxtC2q1U8a5Q,593
@@ -83,12 +83,14 @@ gllm_inference/prompt_formatter/prompt_formatter.pyi,sha256=UkcPi5ao98OGJyNRsqfh
83
83
  gllm_inference/request_processor/__init__.pyi,sha256=hVnfdNZnkTBJHnmLtN3Na4ANP0yK6AstWdIizVr2Apo,227
84
84
  gllm_inference/request_processor/lm_request_processor.pyi,sha256=VnYc8E3Iayyhw-rPnGPfTKuO3ohgFsS8HPrZJeyES5I,5889
85
85
  gllm_inference/request_processor/uses_lm_mixin.pyi,sha256=Yu0XPNuHxq1tWBviHTPw1oThojneFwGHepvGjBXxKQA,6382
86
- gllm_inference/schema/__init__.pyi,sha256=6QFARJnD3u8Z9Z3jbmJlH_aFRHYWMmA9naPyhKugOOI,1501
86
+ gllm_inference/schema/__init__.pyi,sha256=Kc0N_kISRf8wkw07tY5ka9wG_0qdZAvrFMej0zxvIZE,1679
87
87
  gllm_inference/schema/attachment.pyi,sha256=jApuzjOHJDCz4lr4MlHzBgIndh559nbWu2Xp1fk3hso,3297
88
88
  gllm_inference/schema/code_exec_result.pyi,sha256=ZTHh6JtRrPIdQ059P1UAiD2L-tAO1_S5YcMsAXfJ5A0,559
89
89
  gllm_inference/schema/config.pyi,sha256=rAL_UeXyQeXVk1P2kqd8vFWOMwmKenfpQLtvMP74t9s,674
90
90
  gllm_inference/schema/enums.pyi,sha256=XQpohUC7_9nFdEmSZHj_4YmOAwM_C5jvTWw_RN-JiFk,901
91
- gllm_inference/schema/lm_output.pyi,sha256=GafJV0KeD-VSwWkwG1oz-uruXrQ7KDZTuoojPCBRpg8,1956
91
+ gllm_inference/schema/lm_input.pyi,sha256=A5pjz1id6tP9XRNhzQrbmzd66C_q3gzo0UP8rCemz6Q,193
92
+ gllm_inference/schema/lm_output.pyi,sha256=15y-M0lpqM_fSlErPKiN1Pj-ikl5NtFBcWLMYsRidt8,2182
93
+ gllm_inference/schema/mcp.pyi,sha256=Vwu8E2BDl6FvvnI42gIyY3Oki1BdwRE3Uh3aV0rmhQU,1014
92
94
  gllm_inference/schema/message.pyi,sha256=VP9YppKj2mo1esl9cy6qQO9m2mMHUjTmfGDdyUor880,2220
93
95
  gllm_inference/schema/model_id.pyi,sha256=qrr0x4qkd6cGIbc4XATWJb0uckKhd1sAdR_xT7vGIXI,5491
94
96
  gllm_inference/schema/reasoning.pyi,sha256=SlTuiDw87GdnAn-I6YOPIJRhEBiwQljM46JohG05guQ,562
@@ -101,7 +103,7 @@ gllm_inference/utils/io_utils.pyi,sha256=7kUTacHAVRYoemFUOjCH7-Qmw-YsQGd6rGYxjf_
101
103
  gllm_inference/utils/langchain.pyi,sha256=VluQiHkGigDdqLUbhB6vnXiISCP5hHqV0qokYY6dC1A,1164
102
104
  gllm_inference/utils/validation.pyi,sha256=toxBtRp-VItC_X7sNi-GDd7sjibBdWMrR0q01OI2D7k,385
103
105
  gllm_inference.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
104
- gllm_inference_binary-0.5.29.dist-info/METADATA,sha256=Nik4n41nB3IXYTE-OrbmBQRyWBbhiBpciBazCzsy7ts,4857
105
- gllm_inference_binary-0.5.29.dist-info/WHEEL,sha256=5gFdU0ppHeFSkk7pr7iN0nsQR4LbrW1UDEvLLR-hWrw,105
106
- gllm_inference_binary-0.5.29.dist-info/top_level.txt,sha256=FpOjtN80F-qVNgbScXSEyqa0w09FYn6301iq6qt69IQ,15
107
- gllm_inference_binary-0.5.29.dist-info/RECORD,,
106
+ gllm_inference_binary-0.5.31.dist-info/METADATA,sha256=rBFGyTRcClvhOsldXO2FY68jXOmDDkV-x64jv6Liask,4857
107
+ gllm_inference_binary-0.5.31.dist-info/WHEEL,sha256=5gFdU0ppHeFSkk7pr7iN0nsQR4LbrW1UDEvLLR-hWrw,105
108
+ gllm_inference_binary-0.5.31.dist-info/top_level.txt,sha256=FpOjtN80F-qVNgbScXSEyqa0w09FYn6301iq6qt69IQ,15
109
+ gllm_inference_binary-0.5.31.dist-info/RECORD,,