gllm-inference-binary 0.5.9__cp312-cp312-win_amd64.whl → 0.5.9b1__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-inference-binary might be problematic. Click here for more details.
- gllm_inference/builder/build_em_invoker.pyi +17 -2
- gllm_inference/builder/build_lm_invoker.pyi +13 -2
- gllm_inference/constants.pyi +2 -2
- gllm_inference/em_invoker/__init__.pyi +2 -1
- gllm_inference/em_invoker/azure_openai_em_invoker.pyi +7 -5
- gllm_inference/em_invoker/bedrock_em_invoker.pyi +106 -0
- gllm_inference/em_invoker/em_invoker.pyi +11 -4
- gllm_inference/em_invoker/google_em_invoker.pyi +8 -3
- gllm_inference/em_invoker/langchain/em_invoker_embeddings.pyi +25 -3
- gllm_inference/em_invoker/langchain_em_invoker.pyi +7 -2
- gllm_inference/em_invoker/openai_compatible_em_invoker.pyi +6 -2
- gllm_inference/em_invoker/openai_em_invoker.pyi +5 -1
- gllm_inference/em_invoker/schema/bedrock.pyi +22 -0
- gllm_inference/em_invoker/schema/google.pyi +2 -0
- gllm_inference/em_invoker/schema/langchain.pyi +1 -0
- gllm_inference/em_invoker/twelevelabs_em_invoker.pyi +5 -3
- gllm_inference/em_invoker/voyage_em_invoker.pyi +5 -2
- gllm_inference/exceptions/__init__.pyi +3 -3
- gllm_inference/exceptions/error_parser.pyi +26 -33
- gllm_inference/exceptions/exceptions.pyi +40 -28
- gllm_inference/exceptions/provider_error_map.pyi +23 -0
- gllm_inference/lm_invoker/__init__.pyi +2 -1
- gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +12 -13
- gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +2 -0
- gllm_inference/lm_invoker/google_lm_invoker.pyi +8 -1
- gllm_inference/lm_invoker/langchain_lm_invoker.pyi +2 -0
- gllm_inference/lm_invoker/lm_invoker.pyi +7 -6
- gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi +1 -1
- gllm_inference/lm_invoker/openai_lm_invoker.pyi +7 -6
- gllm_inference/lm_invoker/schema/bedrock.pyi +5 -0
- gllm_inference/lm_invoker/schema/langchain.pyi +1 -0
- gllm_inference/lm_invoker/schema/openai.pyi +1 -0
- gllm_inference/lm_invoker/schema/openai_compatible.pyi +4 -0
- gllm_inference/lm_invoker/schema/xai.pyi +31 -0
- gllm_inference/lm_invoker/xai_lm_invoker.pyi +305 -0
- gllm_inference/request_processor/lm_request_processor.pyi +12 -3
- gllm_inference/request_processor/uses_lm_mixin.pyi +109 -29
- gllm_inference/schema/__init__.pyi +5 -4
- gllm_inference/schema/config.pyi +15 -0
- gllm_inference/schema/enums.pyi +5 -0
- gllm_inference/schema/model_id.pyi +10 -1
- gllm_inference/schema/token_usage.pyi +66 -2
- gllm_inference/schema/type_alias.pyi +1 -5
- gllm_inference/utils/__init__.pyi +2 -1
- gllm_inference/utils/io_utils.pyi +26 -0
- gllm_inference.cp312-win_amd64.pyd +0 -0
- gllm_inference.pyi +25 -12
- {gllm_inference_binary-0.5.9.dist-info → gllm_inference_binary-0.5.9b1.dist-info}/METADATA +71 -108
- {gllm_inference_binary-0.5.9.dist-info → gllm_inference_binary-0.5.9b1.dist-info}/RECORD +51 -43
- {gllm_inference_binary-0.5.9.dist-info → gllm_inference_binary-0.5.9b1.dist-info}/WHEEL +2 -1
- gllm_inference_binary-0.5.9b1.dist-info/top_level.txt +1 -0
|
@@ -1,48 +1,41 @@
|
|
|
1
|
-
from
|
|
2
|
-
from gllm_inference.
|
|
3
|
-
from gllm_inference.exceptions.exceptions import BaseInvokerError as BaseInvokerError, InvokerRuntimeError as InvokerRuntimeError, ModelNotFoundError as ModelNotFoundError, ProviderAuthError as ProviderAuthError, ProviderInternalError as ProviderInternalError, ProviderInvalidArgsError as ProviderInvalidArgsError, ProviderOverloadedError as ProviderOverloadedError, ProviderRateLimitError as ProviderRateLimitError
|
|
4
|
-
from gllm_inference.schema import ErrorResponse as ErrorResponse
|
|
1
|
+
from gllm_inference.exceptions.exceptions import BaseInvokerError as BaseInvokerError, InvokerRuntimeError as InvokerRuntimeError
|
|
2
|
+
from gllm_inference.exceptions.provider_error_map import ALL_PROVIDER_ERROR_MAPPINGS as ALL_PROVIDER_ERROR_MAPPINGS, HTTP_STATUS_TO_EXCEPTION_MAP as HTTP_STATUS_TO_EXCEPTION_MAP
|
|
5
3
|
from typing import Any
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
"""
|
|
5
|
+
def build_debug_info(error: Any, class_name: str) -> dict[str, Any]:
|
|
6
|
+
"""Build debug information for an error.
|
|
9
7
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
SERVICE_OVERLOADED = 529
|
|
14
|
-
|
|
15
|
-
HTTP_STATUS_TO_EXCEPTION_MAP: dict[int, type[BaseInvokerError]]
|
|
8
|
+
Args:
|
|
9
|
+
error (Any): The error to extract debug information from.
|
|
10
|
+
class_name (str): The name of the class that raised the error.
|
|
16
11
|
|
|
17
|
-
|
|
18
|
-
|
|
12
|
+
Returns:
|
|
13
|
+
dict[str, Any]: A dictionary containing debug information about the error.
|
|
14
|
+
"""
|
|
15
|
+
def convert_http_status_to_base_invoker_error(error: Exception, invoker: BaseEMInvoker | BaseLMInvoker, status_code_extractor: callable = None, provider_error_mapping: dict[str, type[BaseInvokerError]] = ...) -> BaseInvokerError:
|
|
16
|
+
"""Extract provider error with HTTP status code fallback pattern.
|
|
19
17
|
|
|
20
|
-
This function
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
This function implements the common pattern used by Bedrock and Google invokers
|
|
19
|
+
where they first try to extract HTTP status codes, then fall back to provider-specific
|
|
20
|
+
error mappings based on exception keys.
|
|
23
21
|
|
|
24
22
|
Args:
|
|
25
|
-
|
|
23
|
+
error (Exception): The error to convert.
|
|
24
|
+
invoker (BaseEMInvoker | BaseLMInvoker): The invoker instance that raised the error.
|
|
25
|
+
status_code_extractor (callable): Function to extract status code from error.
|
|
26
|
+
provider_error_mapping (dict): Provider-specific error mapping dictionary.
|
|
26
27
|
|
|
27
28
|
Returns:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
This function analyzes the error message and HTTP status code to determine
|
|
34
|
-
the most appropriate exception type to return.
|
|
29
|
+
BaseInvokerError: The converted error.
|
|
30
|
+
"""
|
|
31
|
+
def convert_to_base_invoker_error(error: Exception, invoker: BaseEMInvoker | BaseLMInvoker) -> BaseInvokerError:
|
|
32
|
+
"""Convert provider error into BaseInvokerError.
|
|
35
33
|
|
|
36
34
|
Args:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
Can be an Exception object, Response object, ClientResponse object, string, or dict
|
|
40
|
-
that might contain HTTP status information.
|
|
35
|
+
error (Exception): The error to convert.
|
|
36
|
+
invoker (BaseEMInvoker | BaseLMInvoker): The invoker instance that raised the error.
|
|
41
37
|
|
|
42
38
|
Returns:
|
|
43
|
-
BaseInvokerError: The
|
|
39
|
+
BaseInvokerError: The converted error.
|
|
44
40
|
|
|
45
|
-
Raises:
|
|
46
|
-
CancelledError: If the original error is a CancelledError.
|
|
47
|
-
TimeoutError: If the original error is a TimeoutError.
|
|
48
41
|
"""
|
|
@@ -22,10 +22,7 @@ class BaseInvokerError(Exception):
|
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
24
|
class ProviderInvalidArgsError(BaseInvokerError):
|
|
25
|
-
"""Exception for bad or malformed requests, invalid parameters or structure.
|
|
26
|
-
|
|
27
|
-
Corresponds to HTTP 400 status code.
|
|
28
|
-
"""
|
|
25
|
+
"""Exception for bad or malformed requests, invalid parameters or structure."""
|
|
29
26
|
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
30
27
|
"""Initialize ProviderInvalidArgsError.
|
|
31
28
|
|
|
@@ -36,10 +33,7 @@ class ProviderInvalidArgsError(BaseInvokerError):
|
|
|
36
33
|
"""
|
|
37
34
|
|
|
38
35
|
class ProviderAuthError(BaseInvokerError):
|
|
39
|
-
"""Exception for authorization failures due to API key issues.
|
|
40
|
-
|
|
41
|
-
Corresponds to HTTP 401-403 status codes.
|
|
42
|
-
"""
|
|
36
|
+
"""Exception for authorization failures due to API key issues."""
|
|
43
37
|
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
44
38
|
"""Initialize ProviderAuthError.
|
|
45
39
|
|
|
@@ -50,10 +44,7 @@ class ProviderAuthError(BaseInvokerError):
|
|
|
50
44
|
"""
|
|
51
45
|
|
|
52
46
|
class ProviderRateLimitError(BaseInvokerError):
|
|
53
|
-
"""Exception for rate limit violations.
|
|
54
|
-
|
|
55
|
-
Corresponds to HTTP 429 status code.
|
|
56
|
-
"""
|
|
47
|
+
"""Exception for rate limit violations."""
|
|
57
48
|
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
58
49
|
"""Initialize ProviderRateLimitError.
|
|
59
50
|
|
|
@@ -64,10 +55,7 @@ class ProviderRateLimitError(BaseInvokerError):
|
|
|
64
55
|
"""
|
|
65
56
|
|
|
66
57
|
class ProviderInternalError(BaseInvokerError):
|
|
67
|
-
"""Exception for unexpected server-side errors.
|
|
68
|
-
|
|
69
|
-
Corresponds to HTTP 500 status code.
|
|
70
|
-
"""
|
|
58
|
+
"""Exception for unexpected server-side errors."""
|
|
71
59
|
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
72
60
|
"""Initialize ProviderInternalError.
|
|
73
61
|
|
|
@@ -78,10 +66,7 @@ class ProviderInternalError(BaseInvokerError):
|
|
|
78
66
|
"""
|
|
79
67
|
|
|
80
68
|
class ProviderOverloadedError(BaseInvokerError):
|
|
81
|
-
"""Exception for when the engine is currently overloaded.
|
|
82
|
-
|
|
83
|
-
Corresponds to HTTP 503, 529 status codes.
|
|
84
|
-
"""
|
|
69
|
+
"""Exception for when the engine is currently overloaded."""
|
|
85
70
|
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
86
71
|
"""Initialize ProviderOverloadedError.
|
|
87
72
|
|
|
@@ -92,10 +77,7 @@ class ProviderOverloadedError(BaseInvokerError):
|
|
|
92
77
|
"""
|
|
93
78
|
|
|
94
79
|
class ModelNotFoundError(BaseInvokerError):
|
|
95
|
-
"""Exception for model not found errors.
|
|
96
|
-
|
|
97
|
-
Corresponds to HTTP 404 status code.
|
|
98
|
-
"""
|
|
80
|
+
"""Exception for model not found errors."""
|
|
99
81
|
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
100
82
|
"""Initialize ModelNotFoundError.
|
|
101
83
|
|
|
@@ -105,11 +87,41 @@ class ModelNotFoundError(BaseInvokerError):
|
|
|
105
87
|
Defaults to None.
|
|
106
88
|
"""
|
|
107
89
|
|
|
108
|
-
class
|
|
109
|
-
"""Exception for
|
|
90
|
+
class APIConnectionError(BaseInvokerError):
|
|
91
|
+
"""Exception for when the client fails to connect to the model provider."""
|
|
92
|
+
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
93
|
+
"""Initialize APIConnectionError.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
class_name (str): The name of the class that raised the error.
|
|
97
|
+
debug_info (dict[str, Any] | None, optional): Additional debug information for developers.
|
|
98
|
+
Defaults to None.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
class APITimeoutError(BaseInvokerError):
|
|
102
|
+
"""Exception for when the request to the model provider times out."""
|
|
103
|
+
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
104
|
+
"""Initialize APITimeoutError.
|
|
110
105
|
|
|
111
|
-
|
|
112
|
-
|
|
106
|
+
Args:
|
|
107
|
+
class_name (str): The name of the class that raised the error.
|
|
108
|
+
debug_info (dict[str, Any] | None, optional): Additional debug information for developers.
|
|
109
|
+
Defaults to None.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
class ProviderConflictError(BaseInvokerError):
|
|
113
|
+
"""Exception for when the request to the model provider conflicts."""
|
|
114
|
+
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
115
|
+
"""Initialize ProviderConflictError.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
class_name (str): The name of the class that raised the error.
|
|
119
|
+
debug_info (dict[str, Any] | None, optional): Additional debug information for developers.
|
|
120
|
+
Defaults to None.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
class InvokerRuntimeError(BaseInvokerError):
|
|
124
|
+
"""Exception for runtime errors that occur during the invocation of the model."""
|
|
113
125
|
def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
|
|
114
126
|
"""Initialize the InvokerRuntimeError.
|
|
115
127
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from enum import IntEnum
|
|
3
|
+
from gllm_inference.exceptions.exceptions import APIConnectionError as APIConnectionError, APITimeoutError as APITimeoutError, BaseInvokerError as BaseInvokerError, ModelNotFoundError as ModelNotFoundError, ProviderAuthError as ProviderAuthError, ProviderConflictError as ProviderConflictError, ProviderInternalError as ProviderInternalError, ProviderInvalidArgsError as ProviderInvalidArgsError, ProviderOverloadedError as ProviderOverloadedError, ProviderRateLimitError as ProviderRateLimitError
|
|
4
|
+
|
|
5
|
+
class ExtendedHTTPStatus(IntEnum):
|
|
6
|
+
"""HTTP status codes outside of the standard HTTPStatus enum.
|
|
7
|
+
|
|
8
|
+
Attributes:
|
|
9
|
+
SERVICE_OVERLOADED (int): HTTP status code for service overloaded.
|
|
10
|
+
"""
|
|
11
|
+
SERVICE_OVERLOADED = 529
|
|
12
|
+
|
|
13
|
+
HTTP_STATUS_TO_EXCEPTION_MAP: dict[int, type[BaseInvokerError]]
|
|
14
|
+
ANTHROPIC_ERROR_MAPPING: Incomplete
|
|
15
|
+
BEDROCK_ERROR_MAPPING: Incomplete
|
|
16
|
+
GOOGLE_ERROR_MAPPING: Incomplete
|
|
17
|
+
LANGCHAIN_ERROR_CODE_MAPPING: Incomplete
|
|
18
|
+
LITELLM_ERROR_MAPPING: Incomplete
|
|
19
|
+
OPENAI_ERROR_MAPPING: Incomplete
|
|
20
|
+
TWELVELABS_ERROR_MAPPING: Incomplete
|
|
21
|
+
VOYAGE_ERROR_MAPPING: Incomplete
|
|
22
|
+
GRPC_STATUS_CODE_MAPPING: Incomplete
|
|
23
|
+
ALL_PROVIDER_ERROR_MAPPINGS: Incomplete
|
|
@@ -7,5 +7,6 @@ from gllm_inference.lm_invoker.langchain_lm_invoker import LangChainLMInvoker as
|
|
|
7
7
|
from gllm_inference.lm_invoker.litellm_lm_invoker import LiteLLMLMInvoker as LiteLLMLMInvoker
|
|
8
8
|
from gllm_inference.lm_invoker.openai_compatible_lm_invoker import OpenAICompatibleLMInvoker as OpenAICompatibleLMInvoker
|
|
9
9
|
from gllm_inference.lm_invoker.openai_lm_invoker import OpenAILMInvoker as OpenAILMInvoker
|
|
10
|
+
from gllm_inference.lm_invoker.xai_lm_invoker import XAILMInvoker as XAILMInvoker
|
|
10
11
|
|
|
11
|
-
__all__ = ['AnthropicLMInvoker', 'AzureOpenAILMInvoker', 'BedrockLMInvoker', 'DatasaurLMInvoker', 'GoogleLMInvoker', 'LangChainLMInvoker', 'LiteLLMLMInvoker', 'OpenAICompatibleLMInvoker', 'OpenAILMInvoker']
|
|
12
|
+
__all__ = ['AnthropicLMInvoker', 'AzureOpenAILMInvoker', 'BedrockLMInvoker', 'DatasaurLMInvoker', 'GoogleLMInvoker', 'LangChainLMInvoker', 'LiteLLMLMInvoker', 'OpenAICompatibleLMInvoker', 'OpenAILMInvoker', 'XAILMInvoker']
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from _typeshed import Incomplete
|
|
2
2
|
from gllm_core.schema.tool import Tool as Tool
|
|
3
3
|
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
4
|
-
from gllm_inference.constants import
|
|
4
|
+
from gllm_inference.constants import AZURE_OPENAI_URL_SUFFIX as AZURE_OPENAI_URL_SUFFIX, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
|
|
5
5
|
from gllm_inference.lm_invoker.openai_lm_invoker import OpenAILMInvoker as OpenAILMInvoker, ReasoningEffort as ReasoningEffort, ReasoningSummary as ReasoningSummary
|
|
6
6
|
from gllm_inference.lm_invoker.schema.openai import Key as Key
|
|
7
7
|
from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema
|
|
@@ -33,7 +33,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
|
|
|
33
33
|
The `AzureOpenAILMInvoker` can be used as follows:
|
|
34
34
|
```python
|
|
35
35
|
lm_invoker = AzureOpenAILMInvoker(
|
|
36
|
-
azure_endpoint="https://<your-azure-openai-endpoint>.openai.azure.com/",
|
|
36
|
+
azure_endpoint="https://<your-azure-openai-endpoint>.openai.azure.com/openai/v1",
|
|
37
37
|
azure_deployment="<your-azure-openai-deployment>",
|
|
38
38
|
)
|
|
39
39
|
result = await lm_invoker.invoke("Hi there!")
|
|
@@ -158,17 +158,17 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
|
|
|
158
158
|
```
|
|
159
159
|
|
|
160
160
|
Reasoning:
|
|
161
|
-
Azure OpenAI\'s o-series models are classified as reasoning models. Reasoning models think
|
|
162
|
-
producing a long internal chain of thought before responding to the user. Reasoning models
|
|
163
|
-
complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
|
|
161
|
+
Azure OpenAI\'s GPT-5 models and o-series models are classified as reasoning models. Reasoning models think
|
|
162
|
+
before they answer, producing a long internal chain of thought before responding to the user. Reasoning models
|
|
163
|
+
excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
|
|
164
164
|
|
|
165
165
|
The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
|
|
166
|
-
will guide the models on how many reasoning tokens it should generate before creating a response
|
|
166
|
+
will guide the models on how many reasoning tokens it should generate before creating a response.
|
|
167
167
|
Available options include:
|
|
168
|
-
1. "
|
|
169
|
-
2. "
|
|
170
|
-
3. "
|
|
171
|
-
|
|
168
|
+
1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
|
|
169
|
+
2. "low": Favors speed and economical token usage.
|
|
170
|
+
3. "medium": Favors a balance between speed and reasoning accuracy.
|
|
171
|
+
4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
|
|
172
172
|
|
|
173
173
|
Azure OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be
|
|
174
174
|
generated. The summary level can be set via the `reasoning_summary` parameter. Available options include:
|
|
@@ -220,7 +220,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
|
|
|
220
220
|
Defaults to an empty list.
|
|
221
221
|
'''
|
|
222
222
|
client: Incomplete
|
|
223
|
-
def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str =
|
|
223
|
+
def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
|
|
224
224
|
"""Initializes a new instance of the AzureOpenAILMInvoker class.
|
|
225
225
|
|
|
226
226
|
Args:
|
|
@@ -228,8 +228,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
|
|
|
228
228
|
azure_deployment (str): The deployment name of the Azure OpenAI service.
|
|
229
229
|
api_key (str | None, optional): The API key for authenticating with Azure OpenAI. Defaults to None, in
|
|
230
230
|
which case the `AZURE_OPENAI_API_KEY` environment variable will be used.
|
|
231
|
-
api_version (str, optional):
|
|
232
|
-
`DEFAULT_AZURE_OPENAI_API_VERSION`.
|
|
231
|
+
api_version (str | None, optional): Deprecated parameter to be removed in v0.6. Defaults to None.
|
|
233
232
|
model_kwargs (dict[str, Any] | None, optional): Additional model parameters. Defaults to None.
|
|
234
233
|
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
|
|
235
234
|
Defaults to None.
|
|
@@ -2,6 +2,8 @@ from _typeshed import Incomplete
|
|
|
2
2
|
from gllm_core.event import EventEmitter as EventEmitter
|
|
3
3
|
from gllm_core.schema.tool import Tool as Tool
|
|
4
4
|
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
5
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
|
|
6
|
+
from gllm_inference.exceptions.provider_error_map import BEDROCK_ERROR_MAPPING as BEDROCK_ERROR_MAPPING
|
|
5
7
|
from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
|
|
6
8
|
from gllm_inference.lm_invoker.schema.bedrock import InputType as InputType, Key as Key, OutputType as OutputType
|
|
7
9
|
from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, LMOutput as LMOutput, Message as Message, ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
|
|
@@ -3,6 +3,8 @@ from gllm_core.event import EventEmitter as EventEmitter
|
|
|
3
3
|
from gllm_core.schema.tool import Tool
|
|
4
4
|
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
5
5
|
from gllm_inference.constants import GOOGLE_SCOPES as GOOGLE_SCOPES, SECONDS_TO_MILLISECONDS as SECONDS_TO_MILLISECONDS
|
|
6
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
|
|
7
|
+
from gllm_inference.exceptions.provider_error_map import GOOGLE_ERROR_MAPPING as GOOGLE_ERROR_MAPPING
|
|
6
8
|
from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
|
|
7
9
|
from gllm_inference.lm_invoker.schema.google import InputType as InputType, Key as Key
|
|
8
10
|
from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
|
|
@@ -162,7 +164,12 @@ class GoogleLMInvoker(BaseLMInvoker):
|
|
|
162
164
|
```python
|
|
163
165
|
LMOutput(
|
|
164
166
|
response="Golden retriever is a good dog breed.",
|
|
165
|
-
token_usage=TokenUsage(
|
|
167
|
+
token_usage=TokenUsage(
|
|
168
|
+
input_tokens=1500,
|
|
169
|
+
output_tokens=200,
|
|
170
|
+
input_token_details=InputTokenDetails(cached_tokens=1200, uncached_tokens=300),
|
|
171
|
+
output_token_details=OutputTokenDetails(reasoning_tokens=180, response_tokens=20),
|
|
172
|
+
),
|
|
166
173
|
duration=0.729,
|
|
167
174
|
finish_details={"finish_reason": "STOP", "finish_message": None},
|
|
168
175
|
)
|
|
@@ -3,6 +3,8 @@ from gllm_core.event import EventEmitter as EventEmitter
|
|
|
3
3
|
from gllm_core.schema.tool import Tool as Tool
|
|
4
4
|
from gllm_core.utils.retry import RetryConfig
|
|
5
5
|
from gllm_inference.constants import INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
|
|
6
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, InvokerRuntimeError as InvokerRuntimeError, build_debug_info as build_debug_info
|
|
7
|
+
from gllm_inference.exceptions.provider_error_map import ALL_PROVIDER_ERROR_MAPPINGS as ALL_PROVIDER_ERROR_MAPPINGS, LANGCHAIN_ERROR_CODE_MAPPING as LANGCHAIN_ERROR_CODE_MAPPING
|
|
6
8
|
from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
|
|
7
9
|
from gllm_inference.lm_invoker.schema.langchain import InputType as InputType, Key as Key
|
|
8
10
|
from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, LMOutput as LMOutput, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
|
|
@@ -5,29 +5,30 @@ from gllm_core.event import EventEmitter as EventEmitter
|
|
|
5
5
|
from gllm_core.schema.tool import Tool
|
|
6
6
|
from gllm_core.utils import RetryConfig
|
|
7
7
|
from gllm_inference.constants import DOCUMENT_MIME_TYPES as DOCUMENT_MIME_TYPES, INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT
|
|
8
|
-
from gllm_inference.exceptions import
|
|
8
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_to_base_invoker_error as convert_to_base_invoker_error
|
|
9
9
|
from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole, ModelId as ModelId, Reasoning as Reasoning, ResponseSchema as ResponseSchema, ToolCall as ToolCall, ToolResult as ToolResult
|
|
10
10
|
from langchain_core.tools import Tool as LangChainTool
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
-
class
|
|
13
|
+
class Key:
|
|
14
14
|
"""Defines valid keys in LM invokers JSON schema."""
|
|
15
15
|
ADDITIONAL_PROPERTIES: str
|
|
16
16
|
ANY_OF: str
|
|
17
|
+
ARGS_SCHEMA: str
|
|
18
|
+
ARUN: str
|
|
19
|
+
COROUTINE: str
|
|
17
20
|
DATA_TYPE: str
|
|
18
21
|
DATA_VALUE: str
|
|
19
22
|
DEFAULT: str
|
|
20
23
|
DESCRIPTION: str
|
|
21
|
-
|
|
22
|
-
META: str
|
|
24
|
+
FUNC: str
|
|
23
25
|
NAME: str
|
|
24
|
-
PARAMETERS: str
|
|
25
26
|
PROPERTIES: str
|
|
26
27
|
REQUIRED: str
|
|
27
28
|
TITLE: str
|
|
28
29
|
TYPE: str
|
|
29
30
|
|
|
30
|
-
class
|
|
31
|
+
class InputType:
|
|
31
32
|
"""Defines valid input types in LM invokers JSON schema."""
|
|
32
33
|
NULL: str
|
|
33
34
|
|
|
@@ -52,7 +52,7 @@ class OpenAICompatibleLMInvoker(BaseLMInvoker):
|
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
Input types:
|
|
55
|
-
The `OpenAICompatibleLMInvoker` supports the following input types: text, audio, and image.
|
|
55
|
+
The `OpenAICompatibleLMInvoker` supports the following input types: text, audio, document, and image.
|
|
56
56
|
Non-text inputs can be passed as an `Attachment` object with the `user` role.
|
|
57
57
|
|
|
58
58
|
Usage example:
|
|
@@ -159,16 +159,17 @@ class OpenAILMInvoker(BaseLMInvoker):
|
|
|
159
159
|
```
|
|
160
160
|
|
|
161
161
|
Reasoning:
|
|
162
|
-
OpenAI\'s o-series models are classified as reasoning models. Reasoning models think before
|
|
163
|
-
producing a long internal chain of thought before responding to the user. Reasoning models
|
|
164
|
-
complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
|
|
162
|
+
OpenAI\'s GPT-5 models and o-series models are classified as reasoning models. Reasoning models think before
|
|
163
|
+
they answer, producing a long internal chain of thought before responding to the user. Reasoning models
|
|
164
|
+
excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
|
|
165
165
|
|
|
166
166
|
The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
|
|
167
167
|
will guide the models on how many reasoning tokens it should generate before creating a response.
|
|
168
168
|
Available options include:
|
|
169
|
-
1. "
|
|
170
|
-
2. "
|
|
171
|
-
3. "
|
|
169
|
+
1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
|
|
170
|
+
2. "low": Favors speed and economical token usage.
|
|
171
|
+
3. "medium": Favors a balance between speed and reasoning accuracy.
|
|
172
|
+
4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
|
|
172
173
|
When not set, the reasoning effort will be equivalent to `medium` by default.
|
|
173
174
|
|
|
174
175
|
OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be
|
|
@@ -5,8 +5,11 @@ class Key:
|
|
|
5
5
|
CONTENT_BLOCK_INDEX: str
|
|
6
6
|
DELTA: str
|
|
7
7
|
DESCRIPTION: str
|
|
8
|
+
ERROR: str
|
|
9
|
+
CODE: str
|
|
8
10
|
FORMAT: str
|
|
9
11
|
FUNCTION: str
|
|
12
|
+
HTTP_STATUS_CODE: str
|
|
10
13
|
INFERENCE_CONFIG: str
|
|
11
14
|
INPUT: str
|
|
12
15
|
INPUT_SCHEMA: str
|
|
@@ -14,9 +17,11 @@ class Key:
|
|
|
14
17
|
JSON: str
|
|
15
18
|
MESSAGE: str
|
|
16
19
|
NAME: str
|
|
20
|
+
RESPONSE: str
|
|
17
21
|
OUTPUT: str
|
|
18
22
|
OUTPUT_TOKENS: str
|
|
19
23
|
PARAMETERS: str
|
|
24
|
+
RESPONSE_METADATA: str
|
|
20
25
|
ROLE: str
|
|
21
26
|
SOURCE: str
|
|
22
27
|
START: str
|
|
@@ -9,6 +9,9 @@ class Key:
|
|
|
9
9
|
DEFS: str
|
|
10
10
|
DESCRIPTION: str
|
|
11
11
|
EFFORT: str
|
|
12
|
+
FILE: str
|
|
13
|
+
FILE_DATA: str
|
|
14
|
+
FILENAME: str
|
|
12
15
|
FINISH_REASON: str
|
|
13
16
|
FORMAT: str
|
|
14
17
|
FUNCTION: str
|
|
@@ -40,6 +43,7 @@ class Key:
|
|
|
40
43
|
|
|
41
44
|
class InputType:
|
|
42
45
|
"""Defines valid input types in OpenAI compatible models."""
|
|
46
|
+
FILE: str
|
|
43
47
|
FUNCTION: str
|
|
44
48
|
IMAGE_URL: str
|
|
45
49
|
INPUT_AUDIO: str
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
|
|
3
|
+
class Key:
|
|
4
|
+
"""Defines valid keys in xAI."""
|
|
5
|
+
ARGUMENTS: str
|
|
6
|
+
CHANNEL_OPTIONS: str
|
|
7
|
+
CITATIONS: str
|
|
8
|
+
COMPLETION_TOKENS: str
|
|
9
|
+
CONTENT: str
|
|
10
|
+
FINISH_REASON: str
|
|
11
|
+
FUNCTION: str
|
|
12
|
+
ID: str
|
|
13
|
+
NAME: str
|
|
14
|
+
ON: str
|
|
15
|
+
PROMPT_TOKENS: str
|
|
16
|
+
REASONING_CONTENT: str
|
|
17
|
+
REASONING_EFFORT: str
|
|
18
|
+
RESPONSE_FORMAT: str
|
|
19
|
+
SEARCH_PARAMETERS: str
|
|
20
|
+
TIMEOUT: str
|
|
21
|
+
TOOL_CALLS: str
|
|
22
|
+
TOOLS: str
|
|
23
|
+
TYPE: str
|
|
24
|
+
URL: str
|
|
25
|
+
URL_CITATION: str
|
|
26
|
+
USAGE: str
|
|
27
|
+
|
|
28
|
+
class ReasoningEffort(StrEnum):
|
|
29
|
+
"""Defines the reasoning effort for reasoning models."""
|
|
30
|
+
HIGH = 'high'
|
|
31
|
+
LOW = 'low'
|