gllm-inference-binary 0.5.8__cp313-cp313-win_amd64.whl → 0.5.9b1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-inference-binary might be problematic. Click here for more details.

Files changed (51) hide show
  1. gllm_inference/builder/build_em_invoker.pyi +17 -2
  2. gllm_inference/builder/build_lm_invoker.pyi +13 -2
  3. gllm_inference/constants.pyi +3 -2
  4. gllm_inference/em_invoker/__init__.pyi +2 -1
  5. gllm_inference/em_invoker/azure_openai_em_invoker.pyi +7 -5
  6. gllm_inference/em_invoker/bedrock_em_invoker.pyi +106 -0
  7. gllm_inference/em_invoker/em_invoker.pyi +11 -4
  8. gllm_inference/em_invoker/google_em_invoker.pyi +9 -4
  9. gllm_inference/em_invoker/langchain/em_invoker_embeddings.pyi +25 -3
  10. gllm_inference/em_invoker/langchain_em_invoker.pyi +7 -2
  11. gllm_inference/em_invoker/openai_compatible_em_invoker.pyi +6 -2
  12. gllm_inference/em_invoker/openai_em_invoker.pyi +5 -1
  13. gllm_inference/em_invoker/schema/bedrock.pyi +22 -0
  14. gllm_inference/em_invoker/schema/google.pyi +2 -0
  15. gllm_inference/em_invoker/schema/langchain.pyi +1 -0
  16. gllm_inference/em_invoker/twelevelabs_em_invoker.pyi +5 -3
  17. gllm_inference/em_invoker/voyage_em_invoker.pyi +5 -2
  18. gllm_inference/exceptions/__init__.pyi +3 -3
  19. gllm_inference/exceptions/error_parser.pyi +26 -33
  20. gllm_inference/exceptions/exceptions.pyi +40 -28
  21. gllm_inference/exceptions/provider_error_map.pyi +23 -0
  22. gllm_inference/lm_invoker/__init__.pyi +2 -1
  23. gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +12 -13
  24. gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +2 -0
  25. gllm_inference/lm_invoker/google_lm_invoker.pyi +9 -2
  26. gllm_inference/lm_invoker/langchain_lm_invoker.pyi +2 -0
  27. gllm_inference/lm_invoker/lm_invoker.pyi +7 -6
  28. gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi +1 -1
  29. gllm_inference/lm_invoker/openai_lm_invoker.pyi +7 -6
  30. gllm_inference/lm_invoker/schema/bedrock.pyi +5 -0
  31. gllm_inference/lm_invoker/schema/langchain.pyi +1 -0
  32. gllm_inference/lm_invoker/schema/openai.pyi +1 -0
  33. gllm_inference/lm_invoker/schema/openai_compatible.pyi +4 -0
  34. gllm_inference/lm_invoker/schema/xai.pyi +31 -0
  35. gllm_inference/lm_invoker/xai_lm_invoker.pyi +305 -0
  36. gllm_inference/request_processor/lm_request_processor.pyi +12 -3
  37. gllm_inference/request_processor/uses_lm_mixin.pyi +109 -29
  38. gllm_inference/schema/__init__.pyi +5 -4
  39. gllm_inference/schema/config.pyi +15 -0
  40. gllm_inference/schema/enums.pyi +5 -0
  41. gllm_inference/schema/model_id.pyi +10 -1
  42. gllm_inference/schema/token_usage.pyi +66 -2
  43. gllm_inference/schema/type_alias.pyi +1 -5
  44. gllm_inference/utils/__init__.pyi +2 -1
  45. gllm_inference/utils/io_utils.pyi +26 -0
  46. gllm_inference.cp313-win_amd64.pyd +0 -0
  47. gllm_inference.pyi +25 -12
  48. {gllm_inference_binary-0.5.8.dist-info → gllm_inference_binary-0.5.9b1.dist-info}/METADATA +71 -108
  49. {gllm_inference_binary-0.5.8.dist-info → gllm_inference_binary-0.5.9b1.dist-info}/RECORD +51 -43
  50. {gllm_inference_binary-0.5.8.dist-info → gllm_inference_binary-0.5.9b1.dist-info}/WHEEL +2 -1
  51. gllm_inference_binary-0.5.9b1.dist-info/top_level.txt +1 -0
@@ -1,48 +1,41 @@
1
- from enum import IntEnum
2
- from gllm_inference.constants import HTTP_STATUS_CODE_PATTERNS as HTTP_STATUS_CODE_PATTERNS
3
- from gllm_inference.exceptions.exceptions import BaseInvokerError as BaseInvokerError, InvokerRuntimeError as InvokerRuntimeError, ModelNotFoundError as ModelNotFoundError, ProviderAuthError as ProviderAuthError, ProviderInternalError as ProviderInternalError, ProviderInvalidArgsError as ProviderInvalidArgsError, ProviderOverloadedError as ProviderOverloadedError, ProviderRateLimitError as ProviderRateLimitError
4
- from gllm_inference.schema import ErrorResponse as ErrorResponse
1
+ from gllm_inference.exceptions.exceptions import BaseInvokerError as BaseInvokerError, InvokerRuntimeError as InvokerRuntimeError
2
+ from gllm_inference.exceptions.provider_error_map import ALL_PROVIDER_ERROR_MAPPINGS as ALL_PROVIDER_ERROR_MAPPINGS, HTTP_STATUS_TO_EXCEPTION_MAP as HTTP_STATUS_TO_EXCEPTION_MAP
5
3
  from typing import Any
6
4
 
7
- class ExtendedHTTPStatus(IntEnum):
8
- """HTTP status codes outside of the standard HTTPStatus enum.
5
+ def build_debug_info(error: Any, class_name: str) -> dict[str, Any]:
6
+ """Build debug information for an error.
9
7
 
10
- Attributes:
11
- SERVICE_OVERLOADED (int): HTTP status code for service overloaded.
12
- """
13
- SERVICE_OVERLOADED = 529
14
-
15
- HTTP_STATUS_TO_EXCEPTION_MAP: dict[int, type[BaseInvokerError]]
8
+ Args:
9
+ error (Any): The error to extract debug information from.
10
+ class_name (str): The name of the class that raised the error.
16
11
 
17
- def extract_http_status_code(response: ErrorResponse) -> int | None:
18
- '''Extract HTTP status code from error message.
12
+ Returns:
13
+ dict[str, Any]: A dictionary containing debug information about the error.
14
+ """
15
+ def convert_http_status_to_base_invoker_error(error: Exception, invoker: BaseEMInvoker | BaseLMInvoker, status_code_extractor: callable = None, provider_error_mapping: dict[str, type[BaseInvokerError]] = ...) -> BaseInvokerError:
16
+ """Extract provider error with HTTP status code fallback pattern.
19
17
 
20
- This function extracts the HTTP status code from the error message. For example,
21
- if the error message is "Error code: 401 - Invalid API key", "HTTP 429 Rate limit exceeded",
22
- or "status: 500 Internal server error", the function will return "401", "429", or "500" respectively.
18
+ This function implements the common pattern used by Bedrock and Google invokers
19
+ where they first try to extract HTTP status codes, then fall back to provider-specific
20
+ error mappings based on exception keys.
23
21
 
24
22
  Args:
25
- response (ErrorResponse): The response object or error message containing HTTP status code.
23
+ error (Exception): The error to convert.
24
+ invoker (BaseEMInvoker | BaseLMInvoker): The invoker instance that raised the error.
25
+ status_code_extractor (callable): Function to extract status code from error.
26
+ provider_error_mapping (dict): Provider-specific error mapping dictionary.
26
27
 
27
28
  Returns:
28
- int | None: The extracted HTTP status code, or None if not found.
29
- '''
30
- def parse_error_message(class_name: str, error: Any) -> BaseInvokerError:
31
- """Parse error from different AI providers and return appropriate exception type.
32
-
33
- This function analyzes the error message and HTTP status code to determine
34
- the most appropriate exception type to return.
29
+ BaseInvokerError: The converted error.
30
+ """
31
+ def convert_to_base_invoker_error(error: Exception, invoker: BaseEMInvoker | BaseLMInvoker) -> BaseInvokerError:
32
+ """Convert provider error into BaseInvokerError.
35
33
 
36
34
  Args:
37
- class_name (str): Class name to include in the error message for clarity.
38
- error (Any): The error object or message from the AI provider.
39
- Can be an Exception object, Response object, ClientResponse object, string, or dict
40
- that might contain HTTP status information.
35
+ error (Exception): The error to convert.
36
+ invoker (BaseEMInvoker | BaseLMInvoker): The invoker instance that raised the error.
41
37
 
42
38
  Returns:
43
- BaseInvokerError: The appropriate exception instance based on error analysis.
39
+ BaseInvokerError: The converted error.
44
40
 
45
- Raises:
46
- CancelledError: If the original error is a CancelledError.
47
- TimeoutError: If the original error is a TimeoutError.
48
41
  """
@@ -22,10 +22,7 @@ class BaseInvokerError(Exception):
22
22
  """
23
23
 
24
24
  class ProviderInvalidArgsError(BaseInvokerError):
25
- """Exception for bad or malformed requests, invalid parameters or structure.
26
-
27
- Corresponds to HTTP 400 status code.
28
- """
25
+ """Exception for bad or malformed requests, invalid parameters or structure."""
29
26
  def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
30
27
  """Initialize ProviderInvalidArgsError.
31
28
 
@@ -36,10 +33,7 @@ class ProviderInvalidArgsError(BaseInvokerError):
36
33
  """
37
34
 
38
35
  class ProviderAuthError(BaseInvokerError):
39
- """Exception for authorization failures due to API key issues.
40
-
41
- Corresponds to HTTP 401-403 status codes.
42
- """
36
+ """Exception for authorization failures due to API key issues."""
43
37
  def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
44
38
  """Initialize ProviderAuthError.
45
39
 
@@ -50,10 +44,7 @@ class ProviderAuthError(BaseInvokerError):
50
44
  """
51
45
 
52
46
  class ProviderRateLimitError(BaseInvokerError):
53
- """Exception for rate limit violations.
54
-
55
- Corresponds to HTTP 429 status code.
56
- """
47
+ """Exception for rate limit violations."""
57
48
  def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
58
49
  """Initialize ProviderRateLimitError.
59
50
 
@@ -64,10 +55,7 @@ class ProviderRateLimitError(BaseInvokerError):
64
55
  """
65
56
 
66
57
  class ProviderInternalError(BaseInvokerError):
67
- """Exception for unexpected server-side errors.
68
-
69
- Corresponds to HTTP 500 status code.
70
- """
58
+ """Exception for unexpected server-side errors."""
71
59
  def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
72
60
  """Initialize ProviderInternalError.
73
61
 
@@ -78,10 +66,7 @@ class ProviderInternalError(BaseInvokerError):
78
66
  """
79
67
 
80
68
  class ProviderOverloadedError(BaseInvokerError):
81
- """Exception for when the engine is currently overloaded.
82
-
83
- Corresponds to HTTP 503, 529 status codes.
84
- """
69
+ """Exception for when the engine is currently overloaded."""
85
70
  def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
86
71
  """Initialize ProviderOverloadedError.
87
72
 
@@ -92,10 +77,7 @@ class ProviderOverloadedError(BaseInvokerError):
92
77
  """
93
78
 
94
79
  class ModelNotFoundError(BaseInvokerError):
95
- """Exception for model not found errors.
96
-
97
- Corresponds to HTTP 404 status code.
98
- """
80
+ """Exception for model not found errors."""
99
81
  def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
100
82
  """Initialize ModelNotFoundError.
101
83
 
@@ -105,11 +87,41 @@ class ModelNotFoundError(BaseInvokerError):
105
87
  Defaults to None.
106
88
  """
107
89
 
108
- class InvokerRuntimeError(BaseInvokerError):
109
- """Exception for runtime errors that occur during the invocation of the model.
90
+ class APIConnectionError(BaseInvokerError):
91
+ """Exception for when the client fails to connect to the model provider."""
92
+ def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
93
+ """Initialize APIConnectionError.
94
+
95
+ Args:
96
+ class_name (str): The name of the class that raised the error.
97
+ debug_info (dict[str, Any] | None, optional): Additional debug information for developers.
98
+ Defaults to None.
99
+ """
100
+
101
+ class APITimeoutError(BaseInvokerError):
102
+ """Exception for when the request to the model provider times out."""
103
+ def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
104
+ """Initialize APITimeoutError.
110
105
 
111
- Corresponds to HTTP status codes other than the ones defined in HTTP_STATUS_TO_EXCEPTION_MAP.
112
- """
106
+ Args:
107
+ class_name (str): The name of the class that raised the error.
108
+ debug_info (dict[str, Any] | None, optional): Additional debug information for developers.
109
+ Defaults to None.
110
+ """
111
+
112
+ class ProviderConflictError(BaseInvokerError):
113
+ """Exception for when the request to the model provider conflicts."""
114
+ def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
115
+ """Initialize ProviderConflictError.
116
+
117
+ Args:
118
+ class_name (str): The name of the class that raised the error.
119
+ debug_info (dict[str, Any] | None, optional): Additional debug information for developers.
120
+ Defaults to None.
121
+ """
122
+
123
+ class InvokerRuntimeError(BaseInvokerError):
124
+ """Exception for runtime errors that occur during the invocation of the model."""
113
125
  def __init__(self, class_name: str, debug_info: dict[str, Any] | None = None) -> None:
114
126
  """Initialize the InvokerRuntimeError.
115
127
 
@@ -0,0 +1,23 @@
1
+ from _typeshed import Incomplete
2
+ from enum import IntEnum
3
+ from gllm_inference.exceptions.exceptions import APIConnectionError as APIConnectionError, APITimeoutError as APITimeoutError, BaseInvokerError as BaseInvokerError, ModelNotFoundError as ModelNotFoundError, ProviderAuthError as ProviderAuthError, ProviderConflictError as ProviderConflictError, ProviderInternalError as ProviderInternalError, ProviderInvalidArgsError as ProviderInvalidArgsError, ProviderOverloadedError as ProviderOverloadedError, ProviderRateLimitError as ProviderRateLimitError
4
+
5
+ class ExtendedHTTPStatus(IntEnum):
6
+ """HTTP status codes outside of the standard HTTPStatus enum.
7
+
8
+ Attributes:
9
+ SERVICE_OVERLOADED (int): HTTP status code for service overloaded.
10
+ """
11
+ SERVICE_OVERLOADED = 529
12
+
13
+ HTTP_STATUS_TO_EXCEPTION_MAP: dict[int, type[BaseInvokerError]]
14
+ ANTHROPIC_ERROR_MAPPING: Incomplete
15
+ BEDROCK_ERROR_MAPPING: Incomplete
16
+ GOOGLE_ERROR_MAPPING: Incomplete
17
+ LANGCHAIN_ERROR_CODE_MAPPING: Incomplete
18
+ LITELLM_ERROR_MAPPING: Incomplete
19
+ OPENAI_ERROR_MAPPING: Incomplete
20
+ TWELVELABS_ERROR_MAPPING: Incomplete
21
+ VOYAGE_ERROR_MAPPING: Incomplete
22
+ GRPC_STATUS_CODE_MAPPING: Incomplete
23
+ ALL_PROVIDER_ERROR_MAPPINGS: Incomplete
@@ -7,5 +7,6 @@ from gllm_inference.lm_invoker.langchain_lm_invoker import LangChainLMInvoker as
7
7
  from gllm_inference.lm_invoker.litellm_lm_invoker import LiteLLMLMInvoker as LiteLLMLMInvoker
8
8
  from gllm_inference.lm_invoker.openai_compatible_lm_invoker import OpenAICompatibleLMInvoker as OpenAICompatibleLMInvoker
9
9
  from gllm_inference.lm_invoker.openai_lm_invoker import OpenAILMInvoker as OpenAILMInvoker
10
+ from gllm_inference.lm_invoker.xai_lm_invoker import XAILMInvoker as XAILMInvoker
10
11
 
11
- __all__ = ['AnthropicLMInvoker', 'AzureOpenAILMInvoker', 'BedrockLMInvoker', 'DatasaurLMInvoker', 'GoogleLMInvoker', 'LangChainLMInvoker', 'LiteLLMLMInvoker', 'OpenAICompatibleLMInvoker', 'OpenAILMInvoker']
12
+ __all__ = ['AnthropicLMInvoker', 'AzureOpenAILMInvoker', 'BedrockLMInvoker', 'DatasaurLMInvoker', 'GoogleLMInvoker', 'LangChainLMInvoker', 'LiteLLMLMInvoker', 'OpenAICompatibleLMInvoker', 'OpenAILMInvoker', 'XAILMInvoker']
@@ -1,7 +1,7 @@
1
1
  from _typeshed import Incomplete
2
2
  from gllm_core.schema.tool import Tool as Tool
3
3
  from gllm_core.utils.retry import RetryConfig as RetryConfig
4
- from gllm_inference.constants import DEFAULT_AZURE_OPENAI_API_VERSION as DEFAULT_AZURE_OPENAI_API_VERSION, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
4
+ from gllm_inference.constants import AZURE_OPENAI_URL_SUFFIX as AZURE_OPENAI_URL_SUFFIX, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
5
5
  from gllm_inference.lm_invoker.openai_lm_invoker import OpenAILMInvoker as OpenAILMInvoker, ReasoningEffort as ReasoningEffort, ReasoningSummary as ReasoningSummary
6
6
  from gllm_inference.lm_invoker.schema.openai import Key as Key
7
7
  from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema
@@ -33,7 +33,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
33
33
  The `AzureOpenAILMInvoker` can be used as follows:
34
34
  ```python
35
35
  lm_invoker = AzureOpenAILMInvoker(
36
- azure_endpoint="https://<your-azure-openai-endpoint>.openai.azure.com/",
36
+ azure_endpoint="https://<your-azure-openai-endpoint>.openai.azure.com/openai/v1",
37
37
  azure_deployment="<your-azure-openai-deployment>",
38
38
  )
39
39
  result = await lm_invoker.invoke("Hi there!")
@@ -158,17 +158,17 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
158
158
  ```
159
159
 
160
160
  Reasoning:
161
- Azure OpenAI\'s o-series models are classified as reasoning models. Reasoning models think before they answer,
162
- producing a long internal chain of thought before responding to the user. Reasoning models excel in
163
- complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
161
+ Azure OpenAI\'s GPT-5 models and o-series models are classified as reasoning models. Reasoning models think
162
+ before they answer, producing a long internal chain of thought before responding to the user. Reasoning models
163
+ excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
164
164
 
165
165
  The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
166
- will guide the models on how many reasoning tokens it should generate before creating a response to the prompt.
166
+ will guide the models on how many reasoning tokens it should generate before creating a response.
167
167
  Available options include:
168
- 1. "low": Favors speed and economical token usage.
169
- 2. "medium": Favors a balance between speed and reasoning accuracy.
170
- 3. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
171
- When not set, the reasoning effort will be equivalent to `medium` by default.
168
+ 1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
169
+ 2. "low": Favors speed and economical token usage.
170
+ 3. "medium": Favors a balance between speed and reasoning accuracy.
171
+ 4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
172
172
 
173
173
  Azure OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be
174
174
  generated. The summary level can be set via the `reasoning_summary` parameter. Available options include:
@@ -220,7 +220,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
220
220
  Defaults to an empty list.
221
221
  '''
222
222
  client: Incomplete
223
- def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
223
+ def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
224
224
  """Initializes a new instance of the AzureOpenAILMInvoker class.
225
225
 
226
226
  Args:
@@ -228,8 +228,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
228
228
  azure_deployment (str): The deployment name of the Azure OpenAI service.
229
229
  api_key (str | None, optional): The API key for authenticating with Azure OpenAI. Defaults to None, in
230
230
  which case the `AZURE_OPENAI_API_KEY` environment variable will be used.
231
- api_version (str, optional): The API version of the Azure OpenAI service. Defaults to
232
- `DEFAULT_AZURE_OPENAI_API_VERSION`.
231
+ api_version (str | None, optional): Deprecated parameter to be removed in v0.6. Defaults to None.
233
232
  model_kwargs (dict[str, Any] | None, optional): Additional model parameters. Defaults to None.
234
233
  default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
235
234
  Defaults to None.
@@ -2,6 +2,8 @@ from _typeshed import Incomplete
2
2
  from gllm_core.event import EventEmitter as EventEmitter
3
3
  from gllm_core.schema.tool import Tool as Tool
4
4
  from gllm_core.utils.retry import RetryConfig as RetryConfig
5
+ from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
6
+ from gllm_inference.exceptions.provider_error_map import BEDROCK_ERROR_MAPPING as BEDROCK_ERROR_MAPPING
5
7
  from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
6
8
  from gllm_inference.lm_invoker.schema.bedrock import InputType as InputType, Key as Key, OutputType as OutputType
7
9
  from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, LMOutput as LMOutput, Message as Message, ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
@@ -2,7 +2,9 @@ from _typeshed import Incomplete
2
2
  from gllm_core.event import EventEmitter as EventEmitter
3
3
  from gllm_core.schema.tool import Tool
4
4
  from gllm_core.utils.retry import RetryConfig as RetryConfig
5
- from gllm_inference.constants import GOOGLE_SCOPES as GOOGLE_SCOPES
5
+ from gllm_inference.constants import GOOGLE_SCOPES as GOOGLE_SCOPES, SECONDS_TO_MILLISECONDS as SECONDS_TO_MILLISECONDS
6
+ from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
7
+ from gllm_inference.exceptions.provider_error_map import GOOGLE_ERROR_MAPPING as GOOGLE_ERROR_MAPPING
6
8
  from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
7
9
  from gllm_inference.lm_invoker.schema.google import InputType as InputType, Key as Key
8
10
  from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, Reasoning as Reasoning, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
@@ -162,7 +164,12 @@ class GoogleLMInvoker(BaseLMInvoker):
162
164
  ```python
163
165
  LMOutput(
164
166
  response="Golden retriever is a good dog breed.",
165
- token_usage=TokenUsage(input_tokens=100, output_tokens=50),
167
+ token_usage=TokenUsage(
168
+ input_tokens=1500,
169
+ output_tokens=200,
170
+ input_token_details=InputTokenDetails(cached_tokens=1200, uncached_tokens=300),
171
+ output_token_details=OutputTokenDetails(reasoning_tokens=180, response_tokens=20),
172
+ ),
166
173
  duration=0.729,
167
174
  finish_details={"finish_reason": "STOP", "finish_message": None},
168
175
  )
@@ -3,6 +3,8 @@ from gllm_core.event import EventEmitter as EventEmitter
3
3
  from gllm_core.schema.tool import Tool as Tool
4
4
  from gllm_core.utils.retry import RetryConfig
5
5
  from gllm_inference.constants import INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
6
+ from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, InvokerRuntimeError as InvokerRuntimeError, build_debug_info as build_debug_info
7
+ from gllm_inference.exceptions.provider_error_map import ALL_PROVIDER_ERROR_MAPPINGS as ALL_PROVIDER_ERROR_MAPPINGS, LANGCHAIN_ERROR_CODE_MAPPING as LANGCHAIN_ERROR_CODE_MAPPING
6
8
  from gllm_inference.lm_invoker.lm_invoker import BaseLMInvoker as BaseLMInvoker
7
9
  from gllm_inference.lm_invoker.schema.langchain import InputType as InputType, Key as Key
8
10
  from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, LMOutput as LMOutput, Message as Message, MessageRole as MessageRole, ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema, TokenUsage as TokenUsage, ToolCall as ToolCall, ToolResult as ToolResult
@@ -5,29 +5,30 @@ from gllm_core.event import EventEmitter as EventEmitter
5
5
  from gllm_core.schema.tool import Tool
6
6
  from gllm_core.utils import RetryConfig
7
7
  from gllm_inference.constants import DOCUMENT_MIME_TYPES as DOCUMENT_MIME_TYPES, INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT
8
- from gllm_inference.exceptions import parse_error_message as parse_error_message
8
+ from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_to_base_invoker_error as convert_to_base_invoker_error
9
9
  from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EmitDataType as EmitDataType, LMOutput as LMOutput, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole, ModelId as ModelId, Reasoning as Reasoning, ResponseSchema as ResponseSchema, ToolCall as ToolCall, ToolResult as ToolResult
10
10
  from langchain_core.tools import Tool as LangChainTool
11
11
  from typing import Any
12
12
 
13
- class _Key:
13
+ class Key:
14
14
  """Defines valid keys in LM invokers JSON schema."""
15
15
  ADDITIONAL_PROPERTIES: str
16
16
  ANY_OF: str
17
+ ARGS_SCHEMA: str
18
+ ARUN: str
19
+ COROUTINE: str
17
20
  DATA_TYPE: str
18
21
  DATA_VALUE: str
19
22
  DEFAULT: str
20
23
  DESCRIPTION: str
21
- FUNCTION: str
22
- META: str
24
+ FUNC: str
23
25
  NAME: str
24
- PARAMETERS: str
25
26
  PROPERTIES: str
26
27
  REQUIRED: str
27
28
  TITLE: str
28
29
  TYPE: str
29
30
 
30
- class _InputType:
31
+ class InputType:
31
32
  """Defines valid input types in LM invokers JSON schema."""
32
33
  NULL: str
33
34
 
@@ -52,7 +52,7 @@ class OpenAICompatibleLMInvoker(BaseLMInvoker):
52
52
  ```
53
53
 
54
54
  Input types:
55
- The `OpenAICompatibleLMInvoker` supports the following input types: text, audio, and image.
55
+ The `OpenAICompatibleLMInvoker` supports the following input types: text, audio, document, and image.
56
56
  Non-text inputs can be passed as an `Attachment` object with the `user` role.
57
57
 
58
58
  Usage example:
@@ -159,16 +159,17 @@ class OpenAILMInvoker(BaseLMInvoker):
159
159
  ```
160
160
 
161
161
  Reasoning:
162
- OpenAI\'s o-series models are classified as reasoning models. Reasoning models think before they answer,
163
- producing a long internal chain of thought before responding to the user. Reasoning models excel in
164
- complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
162
+ OpenAI\'s GPT-5 models and o-series models are classified as reasoning models. Reasoning models think before
163
+ they answer, producing a long internal chain of thought before responding to the user. Reasoning models
164
+ excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
165
165
 
166
166
  The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
167
167
  will guide the models on how many reasoning tokens it should generate before creating a response.
168
168
  Available options include:
169
- 1. "low": Favors speed and economical token usage.
170
- 2. "medium": Favors a balance between speed and reasoning accuracy.
171
- 3. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
169
+ 1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
170
+ 2. "low": Favors speed and economical token usage.
171
+ 3. "medium": Favors a balance between speed and reasoning accuracy.
172
+ 4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
172
173
  When not set, the reasoning effort will be equivalent to `medium` by default.
173
174
 
174
175
  OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be
@@ -5,8 +5,11 @@ class Key:
5
5
  CONTENT_BLOCK_INDEX: str
6
6
  DELTA: str
7
7
  DESCRIPTION: str
8
+ ERROR: str
9
+ CODE: str
8
10
  FORMAT: str
9
11
  FUNCTION: str
12
+ HTTP_STATUS_CODE: str
10
13
  INFERENCE_CONFIG: str
11
14
  INPUT: str
12
15
  INPUT_SCHEMA: str
@@ -14,9 +17,11 @@ class Key:
14
17
  JSON: str
15
18
  MESSAGE: str
16
19
  NAME: str
20
+ RESPONSE: str
17
21
  OUTPUT: str
18
22
  OUTPUT_TOKENS: str
19
23
  PARAMETERS: str
24
+ RESPONSE_METADATA: str
20
25
  ROLE: str
21
26
  SOURCE: str
22
27
  START: str
@@ -1,6 +1,7 @@
1
1
  class Key:
2
2
  """Defines valid keys in LangChain."""
3
3
  ARGS: str
4
+ ERROR_CODE: str
4
5
  FINISH_REASON: str
5
6
  ID: str
6
7
  IMAGE_URL: str
@@ -83,6 +83,7 @@ class ReasoningEffort(StrEnum):
83
83
  HIGH = 'high'
84
84
  MEDIUM = 'medium'
85
85
  LOW = 'low'
86
+ MINIMAL = 'minimal'
86
87
 
87
88
  class ReasoningSummary(StrEnum):
88
89
  """Defines the reasoning summary for reasoning models."""
@@ -9,6 +9,9 @@ class Key:
9
9
  DEFS: str
10
10
  DESCRIPTION: str
11
11
  EFFORT: str
12
+ FILE: str
13
+ FILE_DATA: str
14
+ FILENAME: str
12
15
  FINISH_REASON: str
13
16
  FORMAT: str
14
17
  FUNCTION: str
@@ -40,6 +43,7 @@ class Key:
40
43
 
41
44
  class InputType:
42
45
  """Defines valid input types in OpenAI compatible models."""
46
+ FILE: str
43
47
  FUNCTION: str
44
48
  IMAGE_URL: str
45
49
  INPUT_AUDIO: str
@@ -0,0 +1,31 @@
1
+ from enum import StrEnum
2
+
3
+ class Key:
4
+ """Defines valid keys in xAI."""
5
+ ARGUMENTS: str
6
+ CHANNEL_OPTIONS: str
7
+ CITATIONS: str
8
+ COMPLETION_TOKENS: str
9
+ CONTENT: str
10
+ FINISH_REASON: str
11
+ FUNCTION: str
12
+ ID: str
13
+ NAME: str
14
+ ON: str
15
+ PROMPT_TOKENS: str
16
+ REASONING_CONTENT: str
17
+ REASONING_EFFORT: str
18
+ RESPONSE_FORMAT: str
19
+ SEARCH_PARAMETERS: str
20
+ TIMEOUT: str
21
+ TOOL_CALLS: str
22
+ TOOLS: str
23
+ TYPE: str
24
+ URL: str
25
+ URL_CITATION: str
26
+ USAGE: str
27
+
28
+ class ReasoningEffort(StrEnum):
29
+ """Defines the reasoning effort for reasoning models."""
30
+ HIGH = 'high'
31
+ LOW = 'low'