gllm-inference-binary 0.5.28__cp311-cp311-macosx_13_0_x86_64.whl → 0.5.30__cp311-cp311-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-inference-binary might be problematic. Click here for more details.

@@ -5,13 +5,14 @@ from gllm_core.utils.retry import RetryConfig as RetryConfig
5
5
  from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
6
6
  from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
7
7
  from gllm_inference.lm_invoker.schema.anthropic import InputType as InputType, Key as Key, OutputType as OutputType
8
- from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
8
+ from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
9
9
  from langchain_core.tools import Tool as LangChainTool
10
10
  from typing import Any
11
11
 
12
12
  SUPPORTED_ATTACHMENTS: Incomplete
13
13
  DEFAULT_MAX_TOKENS: int
14
14
  DEFAULT_THINKING_BUDGET: int
15
+ BATCH_STATUS_MAP: Incomplete
15
16
 
16
17
  class AnthropicLMInvoker(BaseLMInvoker):
17
18
  '''A language model invoker to interact with Anthropic language models.
@@ -199,6 +200,41 @@ class AnthropicLMInvoker(BaseLMInvoker):
199
200
  {"type": "response", "value": "is a good dog breed.", ...}
200
201
  ```
201
202
 
203
+ Batch processing:
204
+ The `AnthropicLMInvoker` supports batch processing, which allows the language model to process multiple
205
+ requests in a single call. The batch processing operations include:
206
+
207
+ 1. Create a batch job:
208
+ >>> requests = {"request_1": "What color is the sky?", "request_2": "What color is the grass?"}
209
+ >>> batch_id = await lm_invoker.batch.create(requests)
210
+ >>> print(batch_id)
211
+ "batch_123"
212
+
213
+ 2. Get the status of a batch job:
214
+ >>> status = await lm_invoker.batch.status(batch_id)
215
+ >>> print(status)
216
+ "finished"
217
+
218
+ 3. Retrieve the results of a batch job:
219
+ >>> results = await lm_invoker.batch.retrieve(batch_id)
220
+ >>> print(results)
221
+ {
222
+ "request_1": LMOutput(response="The sky is blue."),
223
+ "request_2": LMOutput(finish_details={"type": "error", "error": {"message": "...", ...}, ...}),
224
+ }
225
+
226
+ 4. List the batch jobs:
227
+ >>> batch_jobs = await lm_invoker.batch.list()
228
+ >>> print(batch_jobs)
229
+ [
230
+ {"id": "batch_123", "status": "finished"},
231
+ {"id": "batch_456", "status": "in_progress"},
232
+ {"id": "batch_789", "status": "canceling"},
233
+ ]
234
+
235
+ 5. Cancel a batch job:
236
+ >>> await lm_invoker.batch.cancel(batch_id)
237
+
202
238
  Output types:
203
239
  The output of the `AnthropicLMInvoker` can either be:
204
240
  1. `str`: The text response if no additional output is needed.
@@ -219,6 +255,7 @@ class AnthropicLMInvoker(BaseLMInvoker):
219
255
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
220
256
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
221
257
  Defaults to an empty list.
258
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
222
259
  '''
223
260
  client: Incomplete
224
261
  thinking: Incomplete
@@ -26,6 +26,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
26
26
  for non-reasoning models. If None, the model will perform medium reasoning effort.
27
27
  reasoning_summary (ReasoningSummary | None): The reasoning summary level for reasoning models. Not allowed
28
28
  for non-reasoning models. If None, no summary will be generated.
29
+ mcp_servers (list[MCPServer]): The list of MCP servers to enable MCP tool calling.
29
30
  code_interpreter (bool): Whether to enable the code interpreter. Currently not supported.
30
31
  web_search (bool): Whether to enable the web search. Currently not supported.
31
32
 
@@ -218,6 +219,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
218
219
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
219
220
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
220
221
  Defaults to an empty list.
222
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
221
223
  '''
222
224
  client: Incomplete
223
225
  def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
@@ -0,0 +1,3 @@
1
+ from gllm_inference.lm_invoker.batch.batch_operations import BatchOperations as BatchOperations
2
+
3
+ __all__ = ['BatchOperations']
@@ -0,0 +1,76 @@
1
+ from gllm_inference.schema import BatchStatus as BatchStatus, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent
2
+ from typing import Any
3
+
4
+ class BatchOperations:
5
+ """Handles batch operations for an LM invoker.
6
+
7
+ This class provides a wrapper around the batch operations of an LM invoker.
8
+ It provides a simple interface for creating, retrieving, and canceling batch jobs.
9
+
10
+ This enables LM invokers to support the following batch operations:
11
+
12
+ Create a batch job:
13
+ >>> batch_id = await lm_invoker.batch.create(...)
14
+
15
+ Get the status of a batch job:
16
+ >>> status = await lm_invoker.batch.status(batch_id)
17
+
18
+ Retrieve the results of a batch job:
19
+ >>> results = await lm_invoker.batch.retrieve(batch_id)
20
+
21
+ List the batch jobs:
22
+ >>> batch_jobs = await lm_invoker.batch.list()
23
+
24
+ Cancel a batch job:
25
+ >>> await lm_invoker.batch.cancel(batch_id)
26
+ """
27
+ def __init__(self, invoker: BaseLMInvoker) -> None:
28
+ """Initializes the batch operations.
29
+
30
+ Args:
31
+ invoker (BaseLMInvoker): The LM invoker to use for the batch operations.
32
+ """
33
+ async def create(self, requests: dict[str, list[Message] | list[MessageContent] | str], hyperparameters: dict[str, Any] | None = None) -> str:
34
+ """Creates a new batch job.
35
+
36
+ Args:
37
+ requests (dict[str, list[Message] | list[MessageContent] | str]): The dictionary of requests that maps
38
+ request ID to the request. Each request must be a valid input for the language model.
39
+ 1. If the request is a list of Message objects, it is used as is.
40
+ 2. If the request is a list of MessageContent or a string, it is converted into a user message.
41
+ hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the language model.
42
+ Defaults to None, in which case the default hyperparameters are used.
43
+
44
+ Returns:
45
+ str: The ID of the batch job.
46
+ """
47
+ async def status(self, batch_id: str) -> BatchStatus:
48
+ """Gets the status of a batch job.
49
+
50
+ Args:
51
+ batch_id (str): The ID of the batch job to get the status of.
52
+
53
+ Returns:
54
+ BatchStatus: The status of the batch job.
55
+ """
56
+ async def retrieve(self, batch_id: str) -> dict[str, LMOutput]:
57
+ """Retrieves the results of a batch job.
58
+
59
+ Args:
60
+ batch_id (str): The ID of the batch job to get the results of.
61
+
62
+ Returns:
63
+ dict[str, LMOutput]: The results of the batch job.
64
+ """
65
+ async def list(self) -> list[dict[str, Any]]:
66
+ """Lists the batch jobs.
67
+
68
+ Returns:
69
+ list[dict[str, Any]]: The list of batch jobs.
70
+ """
71
+ async def cancel(self, batch_id: str) -> None:
72
+ """Cancels a batch job.
73
+
74
+ Args:
75
+ batch_id (str): The ID of the batch job to cancel.
76
+ """
@@ -179,6 +179,7 @@ class BedrockLMInvoker(BaseLMInvoker):
179
179
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
180
180
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
181
181
  Defaults to an empty list.
182
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
182
183
  '''
183
184
  session: Incomplete
184
185
  client_kwargs: Incomplete
@@ -119,6 +119,7 @@ class DatasaurLMInvoker(OpenAICompatibleLMInvoker):
119
119
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
120
120
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
121
121
  Defaults to an empty list.
122
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
122
123
  '''
123
124
  client: Incomplete
124
125
  citations: Incomplete
@@ -254,6 +254,7 @@ class GoogleLMInvoker(BaseLMInvoker):
254
254
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
255
255
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
256
256
  Defaults to an empty list.
257
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
257
258
  '''
258
259
  client_params: Incomplete
259
260
  thinking: Incomplete
@@ -205,6 +205,7 @@ class LangChainLMInvoker(BaseLMInvoker):
205
205
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
206
206
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
207
207
  Defaults to an empty list.
208
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
208
209
  '''
209
210
  model: Incomplete
210
211
  def __init__(self, model: BaseChatModel | None = None, model_class_path: str | None = None, model_name: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None) -> None:
@@ -226,6 +226,7 @@ class LiteLLMLMInvoker(OpenAICompatibleLMInvoker):
226
226
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
227
227
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
228
228
  Defaults to an empty list.
229
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
229
230
  '''
230
231
  completion: Incomplete
231
232
  def __init__(self, model_id: str, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
@@ -6,7 +6,8 @@ from gllm_core.schema.tool import Tool
6
6
  from gllm_core.utils import RetryConfig
7
7
  from gllm_inference.constants import DOCUMENT_MIME_TYPES as DOCUMENT_MIME_TYPES, INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT
8
8
  from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_to_base_invoker_error as convert_to_base_invoker_error
9
- from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole, ModelId as ModelId, Reasoning as Reasoning, ResponseSchema as ResponseSchema, ToolCall as ToolCall, ToolResult as ToolResult
9
+ from gllm_inference.lm_invoker.batch import BatchOperations as BatchOperations
10
+ from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole, ModelId as ModelId, Reasoning as Reasoning, ResponseSchema as ResponseSchema, ToolCall as ToolCall, ToolResult as ToolResult
10
11
  from langchain_core.tools import Tool as LangChainTool
11
12
  from typing import Any
12
13
 
@@ -93,6 +94,13 @@ class BaseLMInvoker(ABC, metaclass=abc.ABCMeta):
93
94
  Returns:
94
95
  str: The name of the language model.
95
96
  """
97
+ @property
98
+ def batch(self) -> BatchOperations:
99
+ """The batch operations for the language model.
100
+
101
+ Returns:
102
+ BatchOperations: The batch operations for the language model.
103
+ """
96
104
  def set_tools(self, tools: list[Tool | LangChainTool]) -> None:
97
105
  """Sets the tools for the language model.
98
106
 
@@ -230,6 +230,7 @@ class OpenAICompatibleLMInvoker(BaseLMInvoker):
230
230
  2.8. citations (list[Chunk]): The citations. Currently not supported. Defaults to an empty list.
231
231
  2.9. code_exec_results (list[CodeExecResult]): The code execution results. Currently not supported.
232
232
  Defaults to an empty list.
233
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls. Currently not supported. Defaults to an empty list.
233
234
  '''
234
235
  client: Incomplete
235
236
  def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None) -> None:
@@ -5,7 +5,7 @@ from gllm_core.utils.retry import RetryConfig as RetryConfig
5
5
  from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
6
6
  from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
7
7
  from gllm_inference.lm_invoker.schema.openai import InputType as InputType, Key as Key, OutputType as OutputType, ReasoningEffort as ReasoningEffort, ReasoningSummary as ReasoningSummary
8
- from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, CodeExecResult as CodeExecResult, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
8
+ from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, CodeExecResult as CodeExecResult, EmitDataType as EmitDataType, LMOutput as LMOutput, MCPCall as MCPCall, MCPServer as MCPServer, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
9
9
  from gllm_inference.utils import validate_string_enum as validate_string_enum
10
10
  from langchain_core.tools import Tool as LangChainTool
11
11
  from typing import Any
@@ -30,6 +30,7 @@ class OpenAILMInvoker(BaseLMInvoker):
30
30
  for non-reasoning models. If None, the model will perform medium reasoning effort.
31
31
  reasoning_summary (ReasoningSummary | None): The reasoning summary level for reasoning models. Not allowed
32
32
  for non-reasoning models. If None, no summary will be generated.
33
+ mcp_servers (list[MCPServer]): The list of MCP servers to enable MCP tool calling.
33
34
  code_interpreter (bool): Whether to enable the code interpreter.
34
35
  web_search (bool): Whether to enable the web search.
35
36
 
@@ -202,6 +203,46 @@ class OpenAILMInvoker(BaseLMInvoker):
202
203
 
203
204
  Setting reasoning-related parameters for non-reasoning models will raise an error.
204
205
 
206
+ MCP tool calling:
207
+ The `OpenAILMInvoker` supports MCP tool calling. This feature can be enabled by providing a list of
208
+ MCP servers to the `mcp_servers` parameter. When MCP servers are provided and the model decides to call
209
+ an MCP tool, the MCP calls are stored in the `mcp_calls` attribute in the output.
210
+
211
+ Usage example:
212
+ ```python
213
+ from gllm_inference.schema import MCPServer
214
+ mcp_server_1 = MCPServer(
215
+ url="https://mcp_server_1.com",
216
+ name="mcp_server_1",
217
+ )
218
+ lm_invoker = OpenAILMInvoker(..., mcp_servers=[mcp_server_1])
219
+ ```
220
+
221
+ Output example:
222
+ ```python
223
+ LMOutput(
224
+ response="The result is 10.",
225
+ mcp_calls=[
226
+ MCPCall(
227
+ id="123",
228
+ server_name="mcp_server_1",
229
+ tool_name="mcp_tool_1",
230
+ args={"key": "value"},
231
+ output="The result is 10.",
232
+ ),
233
+ ],
234
+ )
235
+ ```
236
+
237
+ When streaming is enabled, the MCP call activities will be streamed with the `EventType.DATA` event type.
238
+ Streaming output example:
239
+ ```python
240
+ {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_list_tools\\"}", ...}\', ...}
241
+ {"type": "data", "value": \'{"data_type": "activity", "data_value": "{\\"type\\": \\"mcp_call\\"}", ...}\', ...}
242
+ {"type": "response", "value": "The result ", ...}
243
+ {"type": "response", "value": "is 10.", ...}
244
+ ```
245
+
205
246
  Code interpreter:
206
247
  The code interpreter is a feature that allows the language model to write and run Python code in a
207
248
  sandboxed environment to solve complex problems in domains like data analysis, coding, and math.
@@ -319,9 +360,11 @@ class OpenAILMInvoker(BaseLMInvoker):
319
360
  to cite the relevant sources. Defaults to an empty list.
320
361
  2.9. code_exec_results (list[CodeExecResult]): The code execution results, if the code interpreter is
321
362
  enabled and the language model decides to execute any codes. Defaults to an empty list.
363
+ 2.10. mcp_calls (list[MCPCall]): The MCP calls, if the MCP servers are provided and the language model
364
+ decides to invoke MCP tools. Defaults to an empty list.
322
365
  '''
323
366
  client: Incomplete
324
- def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
367
+ def __init__(self, model_name: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None, mcp_servers: list[MCPServer] | None = None, code_interpreter: bool = False, web_search: bool = False) -> None:
325
368
  """Initializes a new instance of the OpenAILMInvoker class.
326
369
 
327
370
  Args:
@@ -343,6 +386,8 @@ class OpenAILMInvoker(BaseLMInvoker):
343
386
  for non-reasoning models. If None, the model will perform medium reasoning effort. Defaults to None.
344
387
  reasoning_summary (ReasoningSummary | None, optional): The reasoning summary level for reasoning models.
345
388
  Not allowed for non-reasoning models. If None, no summary will be generated. Defaults to None.
389
+ mcp_servers (list[MCPServer] | None, optional): The MCP servers containing tools to be accessed by the
390
+ language model. Defaults to None.
346
391
  code_interpreter (bool, optional): Whether to enable the code interpreter. Defaults to False.
347
392
  web_search (bool, optional): Whether to enable the web search. Defaults to False.
348
393
 
@@ -15,6 +15,7 @@ class Key:
15
15
  ROLE: str
16
16
  SIGNATURE: str
17
17
  SOURCE: str
18
+ STATUS: str
18
19
  STOP_REASON: str
19
20
  SYSTEM: str
20
21
  TIMEOUT: str
@@ -38,11 +39,16 @@ class InputType:
38
39
 
39
40
  class OutputType:
40
41
  """Defines valid output types in Anthropic."""
42
+ CANCELING: str
41
43
  CONTENT_BLOCK_DELTA: str
42
44
  CONTENT_BLOCK_START: str
43
45
  CONTENT_BLOCK_STOP: str
46
+ ENDED: str
47
+ ERRORED: str
48
+ IN_PROGRESS: str
44
49
  MESSAGE_STOP: str
45
50
  REDACTED_THINKING: str
51
+ SUCCEEDED: str
46
52
  TEXT: str
47
53
  TEXT_DELTA: str
48
54
  THINKING: str
@@ -2,6 +2,8 @@ from enum import StrEnum
2
2
 
3
3
  class Key:
4
4
  """Defines valid keys in OpenAI."""
5
+ ALLOWED_TOOLS: str
6
+ ARGS: str
5
7
  ARGUMENTS: str
6
8
  CALL_ID: str
7
9
  CONTAINER: str
@@ -27,13 +29,18 @@ class Key:
27
29
  REASONING: str
28
30
  ROLE: str
29
31
  SCHEMA: str
32
+ REQUIRE_APPROVAL: str
30
33
  REQUIRED: str
34
+ SERVER_LABEL: str
35
+ SERVER_NAME: str
36
+ SERVER_URL: str
31
37
  STATUS: str
32
38
  STRICT: str
33
39
  SUMMARY: str
34
40
  TEXT: str
35
41
  TIMEOUT: str
36
42
  TITLE: str
43
+ TOOL_NAME: str
37
44
  TOOLS: str
38
45
  TYPE: str
39
46
 
@@ -49,10 +56,14 @@ class InputType:
49
56
  INPUT_IMAGE: str
50
57
  INPUT_TEXT: str
51
58
  JSON_SCHEMA: str
59
+ MCP: str
60
+ MCP_CALL: str
61
+ NEVER: str
52
62
  NULL: str
53
63
  OUTPUT_TEXT: str
54
64
  REASONING: str
55
65
  SUMMARY_TEXT: str
66
+ WEB_SEARCH_PREVIEW: str
56
67
 
57
68
  class OutputType:
58
69
  """Defines valid output types in OpenAI."""
@@ -67,6 +78,8 @@ class OutputType:
67
78
  IMAGE: str
68
79
  INCOMPLETE: str
69
80
  ITEM_DONE: str
81
+ MCP_CALL: str
82
+ MCP_LIST_TOOLS: str
70
83
  MESSAGE: str
71
84
  OPEN_PAGE: str
72
85
  REASONING: str
@@ -1,8 +1,9 @@
1
1
  from gllm_inference.schema.attachment import Attachment as Attachment
2
2
  from gllm_inference.schema.code_exec_result import CodeExecResult as CodeExecResult
3
3
  from gllm_inference.schema.config import TruncationConfig as TruncationConfig
4
- from gllm_inference.schema.enums import AttachmentType as AttachmentType, EmitDataType as EmitDataType, MessageRole as MessageRole, TruncateSide as TruncateSide
4
+ from gllm_inference.schema.enums import AttachmentType as AttachmentType, BatchStatus as BatchStatus, EmitDataType as EmitDataType, MessageRole as MessageRole, TruncateSide as TruncateSide
5
5
  from gllm_inference.schema.lm_output import LMOutput as LMOutput
6
+ from gllm_inference.schema.mcp import MCPCall as MCPCall, MCPServer as MCPServer
6
7
  from gllm_inference.schema.message import Message as Message
7
8
  from gllm_inference.schema.model_id import ModelId as ModelId, ModelProvider as ModelProvider
8
9
  from gllm_inference.schema.reasoning import Reasoning as Reasoning
@@ -11,4 +12,4 @@ from gllm_inference.schema.tool_call import ToolCall as ToolCall
11
12
  from gllm_inference.schema.tool_result import ToolResult as ToolResult
12
13
  from gllm_inference.schema.type_alias import EMContent as EMContent, MessageContent as MessageContent, ResponseSchema as ResponseSchema, Vector as Vector
13
14
 
14
- __all__ = ['Attachment', 'AttachmentType', 'CodeExecResult', 'EMContent', 'EmitDataType', 'InputTokenDetails', 'MessageContent', 'LMOutput', 'ModelId', 'ModelProvider', 'Message', 'MessageRole', 'OutputTokenDetails', 'Reasoning', 'ResponseSchema', 'TokenUsage', 'ToolCall', 'ToolResult', 'TruncateSide', 'TruncationConfig', 'Vector']
15
+ __all__ = ['Attachment', 'AttachmentType', 'BatchStatus', 'CodeExecResult', 'EMContent', 'EmitDataType', 'MCPCall', 'MCPServer', 'InputTokenDetails', 'MessageContent', 'LMOutput', 'ModelId', 'ModelProvider', 'Message', 'MessageRole', 'OutputTokenDetails', 'Reasoning', 'ResponseSchema', 'TokenUsage', 'ToolCall', 'ToolResult', 'TruncateSide', 'TruncationConfig', 'Vector']
@@ -7,6 +7,13 @@ class AttachmentType(StrEnum):
7
7
  IMAGE = 'image'
8
8
  VIDEO = 'video'
9
9
 
10
+ class BatchStatus(StrEnum):
11
+ """Defines the status of a batch job."""
12
+ CANCELING = 'canceling'
13
+ IN_PROGRESS = 'in_progress'
14
+ FINISHED = 'finished'
15
+ UNKNOWN = 'unknown'
16
+
10
17
  class EmitDataType(StrEnum):
11
18
  """Defines valid data types for emitting events."""
12
19
  ACTIVITY = 'activity'
@@ -1,5 +1,6 @@
1
1
  from gllm_core.schema import Chunk as Chunk
2
2
  from gllm_inference.schema.code_exec_result import CodeExecResult as CodeExecResult
3
+ from gllm_inference.schema.mcp import MCPCall as MCPCall
3
4
  from gllm_inference.schema.reasoning import Reasoning as Reasoning
4
5
  from gllm_inference.schema.token_usage import TokenUsage as TokenUsage
5
6
  from gllm_inference.schema.tool_call import ToolCall as ToolCall
@@ -24,6 +25,8 @@ class LMOutput(BaseModel):
24
25
  citations (list[Chunk]): The citations, if the language model outputs citations. Defaults to an empty list.
25
26
  code_exec_results (list[CodeExecResult]): The code execution results, if the language model decides to
26
27
  execute code. Defaults to an empty list.
28
+ mcp_calls (list[MCPCall]): The MCP calls, if the language model decides to invoke MCP tools.
29
+ Defaults to an empty list.
27
30
  """
28
31
  response: str
29
32
  tool_calls: list[ToolCall]
@@ -34,3 +37,4 @@ class LMOutput(BaseModel):
34
37
  reasoning: list[Reasoning]
35
38
  citations: list[Chunk]
36
39
  code_exec_results: list[CodeExecResult]
40
+ mcp_calls: list[MCPCall]
@@ -0,0 +1,31 @@
1
+ from pydantic import BaseModel
2
+ from typing import Any
3
+
4
+ class MCPServer(BaseModel):
5
+ """Defines an MCP server.
6
+
7
+ Attributes:
8
+ url (str): The URL of the MCP server.
9
+ name (str): The name of the MCP server.
10
+ allowed_tools (list[str] | None): The allowed tools of the MCP server.
11
+ Defaults to None, in which case all tools are allowed.
12
+ """
13
+ url: str
14
+ name: str
15
+ allowed_tools: list[str] | None
16
+
17
+ class MCPCall(BaseModel):
18
+ """Defines an MCP call.
19
+
20
+ Attributes:
21
+ id (str): The ID of the MCP call. Defaults to an empty string.
22
+ server_name (str): The name of the MCP server. Defaults to an empty string.
23
+ tool_name (str): The name of the tool. Defaults to an empty string.
24
+ args (dict[str, Any]): The arguments of the tool. Defaults to an empty dictionary.
25
+ output (str | None): The output of the tool. Defaults to None.
26
+ """
27
+ id: str
28
+ server_name: str
29
+ tool_name: str
30
+ args: dict[str, Any]
31
+ output: str | None
Binary file
gllm_inference.pyi CHANGED
@@ -83,15 +83,21 @@ import gllm_core.event
83
83
  import gllm_core.schema
84
84
  import gllm_core.schema.tool
85
85
  import langchain_core.tools
86
+ import gllm_inference.schema.BatchStatus
86
87
  import gllm_inference.schema.EmitDataType
87
88
  import gllm_inference.schema.LMOutput
88
89
  import gllm_inference.schema.Message
90
+ import gllm_inference.schema.MessageContent
89
91
  import gllm_inference.schema.Reasoning
90
92
  import gllm_inference.schema.ResponseSchema
91
93
  import gllm_inference.schema.TokenUsage
92
94
  import gllm_inference.schema.ToolCall
93
95
  import gllm_inference.schema.ToolResult
94
96
  import anthropic
97
+ import anthropic.types
98
+ import anthropic.types.message_create_params
99
+ import anthropic.types.messages
100
+ import anthropic.types.messages.batch_create_params
95
101
  import gllm_inference.schema.MessageRole
96
102
  import langchain_core.language_models
97
103
  import langchain_core.messages
@@ -100,9 +106,11 @@ import litellm
100
106
  import inspect
101
107
  import time
102
108
  import jsonschema
103
- import gllm_inference.schema.MessageContent
109
+ import gllm_inference.lm_invoker.batch.BatchOperations
104
110
  import gllm_inference.utils.validate_string_enum
105
111
  import gllm_inference.schema.CodeExecResult
112
+ import gllm_inference.schema.MCPCall
113
+ import gllm_inference.schema.MCPServer
106
114
  import xai_sdk
107
115
  import xai_sdk.chat
108
116
  import xai_sdk.search
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gllm-inference-binary
3
- Version: 0.5.28
3
+ Version: 0.5.30
4
4
  Summary: A library containing components related to model inferences in Gen AI applications.
5
5
  Author-email: Henry Wicaksono <henry.wicaksono@gdplabs.id>, Resti Febrina <resti.febrina@gdplabs.id>
6
6
  Requires-Python: <3.14,>=3.11
@@ -23,9 +23,9 @@ Requires-Dist: coverage<8.0.0,>=7.4.4; extra == "dev"
23
23
  Requires-Dist: mypy<2.0.0,>=1.15.0; extra == "dev"
24
24
  Requires-Dist: pre-commit<4.0.0,>=3.7.0; extra == "dev"
25
25
  Requires-Dist: pytest<9.0.0,>=8.1.1; extra == "dev"
26
- Requires-Dist: pytest-asyncio<1.0.0,>=0.23.6; extra == "dev"
26
+ Requires-Dist: pytest-asyncio<0.24.0,>=0.23.6; extra == "dev"
27
27
  Requires-Dist: pytest-cov<6.0.0,>=5.0.0; extra == "dev"
28
- Requires-Dist: ruff<1.0.0,>=0.6.7; extra == "dev"
28
+ Requires-Dist: ruff<0.7.0,>=0.6.7; extra == "dev"
29
29
  Provides-Extra: anthropic
30
30
  Requires-Dist: anthropic<0.61.0,>=0.60.0; extra == "anthropic"
31
31
  Provides-Extra: bedrock
@@ -1,5 +1,5 @@
1
- gllm_inference.cpython-311-darwin.so,sha256=j-rM0TRVqD2j-jldVtDQQss25o1Sotr01WV2N9sAlJg,4389080
2
- gllm_inference.pyi,sha256=VUZT9bvSkuGeID-SO9H55g7PBJohfwPcqWwRwx_nh20,3820
1
+ gllm_inference.cpython-311-darwin.so,sha256=jnpzH6qgNunrQKEWU8ASU6miA_lORXLWDTL-GoPM3JQ,4571520
2
+ gllm_inference.pyi,sha256=F3Sng0iliny70Fkn-isyrhVcB2lsiNMSPxu_BQqnDwQ,4144
3
3
  gllm_inference/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  gllm_inference/constants.pyi,sha256=EFVMtK3xDK2yjGoHp8EL3LeRZWhIefVKClI9jvbfQQ0,267
5
5
  gllm_inference/builder/__init__.pyi,sha256=usz2lvfwO4Yk-ZGKXbCWG1cEr3nlQXxMNDNC-2yc1NM,500
@@ -36,24 +36,26 @@ gllm_inference/exceptions/error_parser.pyi,sha256=IOfa--NpLUW5E9Qq0mwWi6ZpTAbUyy
36
36
  gllm_inference/exceptions/exceptions.pyi,sha256=Bv996qLa_vju0Qjf4GewMxdkq8CV9LRZb0S6289DldA,5725
37
37
  gllm_inference/exceptions/provider_error_map.pyi,sha256=P1WnhWkM103FW6hqMfNZBOmYSWOmsJtll3VQV8DGb8E,1210
38
38
  gllm_inference/lm_invoker/__init__.pyi,sha256=NmQSqObPjevEP1KbbrNnaz4GMh175EVPERZ19vK5Emc,1202
39
- gllm_inference/lm_invoker/anthropic_lm_invoker.pyi,sha256=MsF3OmDo0L9aEHuTJYTgsoDILi2B_IgKtPpDcDMduWc,14925
40
- gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=9gzto0yuZySR_8FII0PzbKLN_bCCdDP2vXQlVwnK9V8,14580
41
- gllm_inference/lm_invoker/bedrock_lm_invoker.pyi,sha256=fAJCLdOMcR4OJpNFj3vN0TiNBOR8PzC1xPvqJDEwlJc,12690
42
- gllm_inference/lm_invoker/datasaur_lm_invoker.pyi,sha256=QS84w3WpD3Oyl5HdxrucsadCmsHE8gn6Ewl3l01DCgI,9203
43
- gllm_inference/lm_invoker/google_lm_invoker.pyi,sha256=LG9lE8IXnObl2Uq9VPLeBT4WRqE5zUV_2gojSHiSqwQ,17052
44
- gllm_inference/lm_invoker/langchain_lm_invoker.pyi,sha256=NjlxGHZZ-GTZTwz4XviU6a0eKMlwcTXy4wUiCrmnxPQ,13599
45
- gllm_inference/lm_invoker/litellm_lm_invoker.pyi,sha256=_c56ewpEQ-Ywj5ofFzRYBvQgefR7Q_WkcQt97lnIFgg,13128
46
- gllm_inference/lm_invoker/lm_invoker.pyi,sha256=dQwYtVMCOmqvx68Znr3-pFkeA8upvk5wtRnkbKWyqY4,7881
47
- gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi,sha256=_hOAde_Faph3JoGYh7zLch6BRc2Lam8PXZvi5-PkL-E,14938
48
- gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=wPTJr5DkXpoXpxw3MoaqEnzAOUanBRGUu954KdKDaVU,19649
39
+ gllm_inference/lm_invoker/anthropic_lm_invoker.pyi,sha256=MXvopJfqv-lBhnFFn01yW-D9Hh3JYL3PiFf3cg-dfp0,16542
40
+ gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=EXPFTsPwUk42B12MVDhh98maCFSkdPWfqJeht5Wjpq4,14783
41
+ gllm_inference/lm_invoker/bedrock_lm_invoker.pyi,sha256=uZ9wpzOKSOvgu1ICMLqEXcrOE3RIbUmqHmgtuwBekPg,12802
42
+ gllm_inference/lm_invoker/datasaur_lm_invoker.pyi,sha256=J_tfnIgVDr-zQ7YE5_TKMyZyA336ly04g1l-ZKnr1As,9315
43
+ gllm_inference/lm_invoker/google_lm_invoker.pyi,sha256=4-3CwfBcDh6thxkidRcYbGVp9bCDkQTemat6VBHsUC8,17164
44
+ gllm_inference/lm_invoker/langchain_lm_invoker.pyi,sha256=hnQcScOHs31xx4GB6YI-RnREiNg7r8fvQrmGBscQlu0,13711
45
+ gllm_inference/lm_invoker/litellm_lm_invoker.pyi,sha256=eEPvDOCj55f9wJ0neNl4O9XQWvSI6YWJgHZMHOaYGRk,13240
46
+ gllm_inference/lm_invoker/lm_invoker.pyi,sha256=zlhvzAs2oWX3vv_HcYpl-0qSRqLZ4Tb020CmI4Oixto,8202
47
+ gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi,sha256=XV-KjulVYAhU0e2giqOdHUGCSCrybXRWsrtzZByqOXI,15050
48
+ gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=PNlhhb_lVk91dNSuha9ZuK6YaRDYVnc94Tbnj3z9wds,21769
49
49
  gllm_inference/lm_invoker/xai_lm_invoker.pyi,sha256=rV8D3E730OUmwK7jELKSziMUl7MnpbfxMAvMuq8-Aew,15687
50
+ gllm_inference/lm_invoker/batch/__init__.pyi,sha256=W4W-_yfk7lL20alREJai6GnwuQvdlKRfwQCX4mQK4XI,127
51
+ gllm_inference/lm_invoker/batch/batch_operations.pyi,sha256=Pf_gORe6Oh6cDT_sJhF0h8I7rEsTbwQZMG85NOQw3xQ,2965
50
52
  gllm_inference/lm_invoker/schema/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- gllm_inference/lm_invoker/schema/anthropic.pyi,sha256=zNbm4RV454dBEEUUN-Vyl_4cO628wUMPZyrO27O_DfM,991
53
+ gllm_inference/lm_invoker/schema/anthropic.pyi,sha256=6lreMyHKRfZzX5NBYKnQf1Z6RzXBjTvqZj2VbMeaTLQ,1098
52
54
  gllm_inference/lm_invoker/schema/bedrock.pyi,sha256=FJLY-ZkkLUYDV48pfsLatnot4ev_xxz9xAayLK28CpU,1027
53
55
  gllm_inference/lm_invoker/schema/datasaur.pyi,sha256=aA4DhTXIezwLvFzphR24a5ueVln2FCBIloP9Hbt3iz4,230
54
56
  gllm_inference/lm_invoker/schema/google.pyi,sha256=AIsNgq0ZZuicHmx4bL7z6q-946T05nWts3HUeA8hhHQ,505
55
57
  gllm_inference/lm_invoker/schema/langchain.pyi,sha256=rZcIxuvABI4pKfyVvkRBRqfJJogZ67EFPydpubHt49c,429
56
- gllm_inference/lm_invoker/schema/openai.pyi,sha256=9KjOJMnDyPs4hsysD8qFEMObUkbnxp6U9PmRIiUa3h4,1926
58
+ gllm_inference/lm_invoker/schema/openai.pyi,sha256=oju4itbH6mm-yMCqX3m-448XJra4cg6oHHq7abYGM_g,2187
57
59
  gllm_inference/lm_invoker/schema/openai_compatible.pyi,sha256=m3bL2hVpxI_crURIi1bGDUqMy1Z5OgKBVU_-BkhX1mg,1166
58
60
  gllm_inference/lm_invoker/schema/xai.pyi,sha256=cWnbJmDtllqRH3NXpQbiXgkNBcUXr8ksDSDywcgJebE,632
59
61
  gllm_inference/model/__init__.pyi,sha256=qClHIgljqhPPCKlGTKmHsWgYb4_hADybxtC2q1U8a5Q,593
@@ -81,12 +83,13 @@ gllm_inference/prompt_formatter/prompt_formatter.pyi,sha256=UkcPi5ao98OGJyNRsqfh
81
83
  gllm_inference/request_processor/__init__.pyi,sha256=hVnfdNZnkTBJHnmLtN3Na4ANP0yK6AstWdIizVr2Apo,227
82
84
  gllm_inference/request_processor/lm_request_processor.pyi,sha256=VnYc8E3Iayyhw-rPnGPfTKuO3ohgFsS8HPrZJeyES5I,5889
83
85
  gllm_inference/request_processor/uses_lm_mixin.pyi,sha256=Yu0XPNuHxq1tWBviHTPw1oThojneFwGHepvGjBXxKQA,6382
84
- gllm_inference/schema/__init__.pyi,sha256=hsU0GRL6bUdNdix5WDM5Ca-RfmZLu2BdVngSotup-II,1458
86
+ gllm_inference/schema/__init__.pyi,sha256=xKwbHrU4AGN-iSnNoAoHd2SsqnVqmhq7sGv8k61nU5k,1606
85
87
  gllm_inference/schema/attachment.pyi,sha256=jApuzjOHJDCz4lr4MlHzBgIndh559nbWu2Xp1fk3hso,3297
86
88
  gllm_inference/schema/code_exec_result.pyi,sha256=ZTHh6JtRrPIdQ059P1UAiD2L-tAO1_S5YcMsAXfJ5A0,559
87
89
  gllm_inference/schema/config.pyi,sha256=rAL_UeXyQeXVk1P2kqd8vFWOMwmKenfpQLtvMP74t9s,674
88
- gllm_inference/schema/enums.pyi,sha256=w5Bq3m-Ixl4yAd4801APhw9fjCiuqttWuUXWvSWSEEs,717
89
- gllm_inference/schema/lm_output.pyi,sha256=GafJV0KeD-VSwWkwG1oz-uruXrQ7KDZTuoojPCBRpg8,1956
90
+ gllm_inference/schema/enums.pyi,sha256=XQpohUC7_9nFdEmSZHj_4YmOAwM_C5jvTWw_RN-JiFk,901
91
+ gllm_inference/schema/lm_output.pyi,sha256=15y-M0lpqM_fSlErPKiN1Pj-ikl5NtFBcWLMYsRidt8,2182
92
+ gllm_inference/schema/mcp.pyi,sha256=Vwu8E2BDl6FvvnI42gIyY3Oki1BdwRE3Uh3aV0rmhQU,1014
90
93
  gllm_inference/schema/message.pyi,sha256=VP9YppKj2mo1esl9cy6qQO9m2mMHUjTmfGDdyUor880,2220
91
94
  gllm_inference/schema/model_id.pyi,sha256=qrr0x4qkd6cGIbc4XATWJb0uckKhd1sAdR_xT7vGIXI,5491
92
95
  gllm_inference/schema/reasoning.pyi,sha256=SlTuiDw87GdnAn-I6YOPIJRhEBiwQljM46JohG05guQ,562
@@ -99,7 +102,7 @@ gllm_inference/utils/io_utils.pyi,sha256=7kUTacHAVRYoemFUOjCH7-Qmw-YsQGd6rGYxjf_
99
102
  gllm_inference/utils/langchain.pyi,sha256=VluQiHkGigDdqLUbhB6vnXiISCP5hHqV0qokYY6dC1A,1164
100
103
  gllm_inference/utils/validation.pyi,sha256=toxBtRp-VItC_X7sNi-GDd7sjibBdWMrR0q01OI2D7k,385
101
104
  gllm_inference.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
102
- gllm_inference_binary-0.5.28.dist-info/METADATA,sha256=geMzdHIq24oeCxAUWrc-VaNsXHXJlN1Fys5JlzDqI8M,4856
103
- gllm_inference_binary-0.5.28.dist-info/WHEEL,sha256=s8TBzVnsSJujxqbMe-G5Vh0IPlslLTnVva4BiQ75Hjo,105
104
- gllm_inference_binary-0.5.28.dist-info/top_level.txt,sha256=FpOjtN80F-qVNgbScXSEyqa0w09FYn6301iq6qt69IQ,15
105
- gllm_inference_binary-0.5.28.dist-info/RECORD,,
105
+ gllm_inference_binary-0.5.30.dist-info/METADATA,sha256=NX8jJe_24A18KRxeJuRZFQuH_JfgUbENb4SRm2jB2B0,4857
106
+ gllm_inference_binary-0.5.30.dist-info/WHEEL,sha256=s8TBzVnsSJujxqbMe-G5Vh0IPlslLTnVva4BiQ75Hjo,105
107
+ gllm_inference_binary-0.5.30.dist-info/top_level.txt,sha256=FpOjtN80F-qVNgbScXSEyqa0w09FYn6301iq6qt69IQ,15
108
+ gllm_inference_binary-0.5.30.dist-info/RECORD,,