gllm-inference-binary 0.5.19__cp311-cp311-win_amd64.whl → 0.5.21__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-inference-binary might be problematic. Click here for more details.

@@ -107,7 +107,7 @@ def build_lm_invoker(model_id: str | ModelId, credentials: str | dict[str, Any]
107
107
  # Using Azure OpenAI
108
108
  ```python
109
109
  lm_invoker = build_lm_invoker(
110
- model_id="azure-openai/https://my-resource.openai.azure.com:my-deployment",
110
+ model_id="azure-openai/https://my-resource.openai.azure.com/openai/v1:my-deployment",
111
111
  credentials="azure-api-key"
112
112
  )
113
113
  ```
@@ -1,13 +1,15 @@
1
1
  from _typeshed import Incomplete
2
2
  from gllm_core.schema.tool import Tool as Tool
3
3
  from gllm_core.utils.retry import RetryConfig as RetryConfig
4
- from gllm_inference.constants import DEFAULT_AZURE_OPENAI_API_VERSION as DEFAULT_AZURE_OPENAI_API_VERSION, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
4
+ from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
5
5
  from gllm_inference.lm_invoker.openai_lm_invoker import OpenAILMInvoker as OpenAILMInvoker, ReasoningEffort as ReasoningEffort, ReasoningSummary as ReasoningSummary
6
6
  from gllm_inference.lm_invoker.schema.openai import Key as Key
7
7
  from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema
8
8
  from langchain_core.tools import Tool as LangChainTool
9
9
  from typing import Any
10
10
 
11
+ URL_SUFFIX: str
12
+
11
13
  class AzureOpenAILMInvoker(OpenAILMInvoker):
12
14
  '''A language model invoker to interact with Azure OpenAI language models.
13
15
 
@@ -33,7 +35,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
33
35
  The `AzureOpenAILMInvoker` can be used as follows:
34
36
  ```python
35
37
  lm_invoker = AzureOpenAILMInvoker(
36
- azure_endpoint="https://<your-azure-openai-endpoint>.openai.azure.com/",
38
+ azure_endpoint="https://<your-azure-openai-endpoint>.openai.azure.com/openai/v1",
37
39
  azure_deployment="<your-azure-openai-deployment>",
38
40
  )
39
41
  result = await lm_invoker.invoke("Hi there!")
@@ -158,17 +160,17 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
158
160
  ```
159
161
 
160
162
  Reasoning:
161
- Azure OpenAI\'s o-series models are classified as reasoning models. Reasoning models think before they answer,
162
- producing a long internal chain of thought before responding to the user. Reasoning models excel in
163
- complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
163
+ Azure OpenAI\'s GPT-5 models and o-series models are classified as reasoning models. Reasoning models think
164
+ before they answer, producing a long internal chain of thought before responding to the user. Reasoning models
165
+ excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
164
166
 
165
167
  The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
166
- will guide the models on how many reasoning tokens it should generate before creating a response to the prompt.
168
+ will guide the models on how many reasoning tokens it should generate before creating a response.
167
169
  Available options include:
168
- 1. "low": Favors speed and economical token usage.
169
- 2. "medium": Favors a balance between speed and reasoning accuracy.
170
- 3. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
171
- When not set, the reasoning effort will be equivalent to `medium` by default.
170
+ 1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
171
+ 2. "low": Favors speed and economical token usage.
172
+ 3. "medium": Favors a balance between speed and reasoning accuracy.
173
+ 4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
172
174
 
173
175
  Azure OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be
174
176
  generated. The summary level can be set via the `reasoning_summary` parameter. Available options include:
@@ -220,7 +222,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
220
222
  Defaults to an empty list.
221
223
  '''
222
224
  client: Incomplete
223
- def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
225
+ def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
224
226
  """Initializes a new instance of the AzureOpenAILMInvoker class.
225
227
 
226
228
  Args:
@@ -228,8 +230,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
228
230
  azure_deployment (str): The deployment name of the Azure OpenAI service.
229
231
  api_key (str | None, optional): The API key for authenticating with Azure OpenAI. Defaults to None, in
230
232
  which case the `AZURE_OPENAI_API_KEY` environment variable will be used.
231
- api_version (str, optional): The API version of the Azure OpenAI service. Defaults to
232
- `DEFAULT_AZURE_OPENAI_API_VERSION`.
233
+ api_version (str | None, optional): Deprecated parameter to be removed in v0.6. Defaults to None.
233
234
  model_kwargs (dict[str, Any] | None, optional): Additional model parameters. Defaults to None.
234
235
  default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
235
236
  Defaults to None.
@@ -159,16 +159,17 @@ class OpenAILMInvoker(BaseLMInvoker):
159
159
  ```
160
160
 
161
161
  Reasoning:
162
- OpenAI\'s o-series models are classified as reasoning models. Reasoning models think before they answer,
163
- producing a long internal chain of thought before responding to the user. Reasoning models excel in
164
- complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
162
+ OpenAI\'s GPT-5 models and o-series models are classified as reasoning models. Reasoning models think before
163
+ they answer, producing a long internal chain of thought before responding to the user. Reasoning models
164
+ excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
165
165
 
166
166
  The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
167
167
  will guide the models on how many reasoning tokens it should generate before creating a response.
168
168
  Available options include:
169
- 1. "low": Favors speed and economical token usage.
170
- 2. "medium": Favors a balance between speed and reasoning accuracy.
171
- 3. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
169
+ 1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
170
+ 2. "low": Favors speed and economical token usage.
171
+ 3. "medium": Favors a balance between speed and reasoning accuracy.
172
+ 4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
172
173
  When not set, the reasoning effort will be equivalent to `medium` by default.
173
174
 
174
175
  OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be
@@ -83,6 +83,7 @@ class ReasoningEffort(StrEnum):
83
83
  HIGH = 'high'
84
84
  MEDIUM = 'medium'
85
85
  LOW = 'low'
86
+ MINIMAL = 'minimal'
86
87
 
87
88
  class ReasoningSummary(StrEnum):
88
89
  """Defines the reasoning summary for reasoning models."""
@@ -57,7 +57,7 @@ class ModelId(BaseModel):
57
57
 
58
58
  # Using Azure OpenAI
59
59
  ```python
60
- model_id = ModelId.from_string("azure-openai/https://my-resource.openai.azure.com:my-deployment")
60
+ model_id = ModelId.from_string("azure-openai/https://my-resource.openai.azure.com/openai/v1:my-deployment")
61
61
  ```
62
62
 
63
63
  # Using OpenAI compatible endpoints (e.g. Groq)
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gllm-inference-binary
3
- Version: 0.5.19
3
+ Version: 0.5.21
4
4
  Summary: A library containing components related to model inferences in Gen AI applications.
5
5
  Author: Henry Wicaksono
6
6
  Author-email: henry.wicaksono@gdplabs.id
@@ -1,7 +1,7 @@
1
1
  gllm_inference/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  gllm_inference/builder/__init__.pyi,sha256=-bw1uDx7CAM7pkvjvb1ZXku9zXlQ7aEAyC83KIn3bz8,506
3
3
  gllm_inference/builder/build_em_invoker.pyi,sha256=cBqifw0frhYZinDndeCjqDVqv7oeW728-i5nH4JMyhk,6010
4
- gllm_inference/builder/build_lm_invoker.pyi,sha256=uVHupv0yAP8WGUqu1fTSSrvIV9KK8UvYydXI-hMCqlU,7037
4
+ gllm_inference/builder/build_lm_invoker.pyi,sha256=p63iuVBOOpNizItGK6HDxYDrgXdovtfSe0VrvrEd-PA,7047
5
5
  gllm_inference/builder/build_lm_request_processor.pyi,sha256=0pJINCP4nnXVwuhIbhsaiwzjX8gohQt2oqXFZhTFSUs,4584
6
6
  gllm_inference/builder/build_output_parser.pyi,sha256=sgSTrzUmSRxPzUUum0fDU7A3NXYoYhpi6bEx4Q2XMnA,965
7
7
  gllm_inference/catalog/__init__.pyi,sha256=HWgPKWIzprpMHRKe_qN9BZSIQhVhrqiyjLjIXwvj1ho,291
@@ -34,7 +34,7 @@ gllm_inference/exceptions/error_parser.pyi,sha256=ggmh8DJXdwFJInNLrP24WVJt_4raxb
34
34
  gllm_inference/exceptions/exceptions.pyi,sha256=ViXvIzm7tLcstjqfwC6nPziDg0UAmoUAWZVWrAJyp3w,4763
35
35
  gllm_inference/lm_invoker/__init__.pyi,sha256=eE_HDCl9A135mi6mtIV55q-T9J1O8OpbMcqWuny3w9A,1214
36
36
  gllm_inference/lm_invoker/anthropic_lm_invoker.pyi,sha256=85uvShLv4-eiGOpTMgwWpQGZXPW6XaB6GrexBmxg_sQ,15200
37
- gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=N2TjGz5Gi6xiLkAgI6SzWq_V3tj66HJfMNff7d04uU0,14856
37
+ gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=Ek7pZYaSWWFdPKI6iPKiICvZxN7xCVglQleTSSwW5ok,14799
38
38
  gllm_inference/lm_invoker/bedrock_lm_invoker.pyi,sha256=ae5P_9sjtcOgMIUaRchvp8F0FujoeP4e2F_OoHSe_go,12655
39
39
  gllm_inference/lm_invoker/datasaur_lm_invoker.pyi,sha256=c4H3TOz0LIhWjokCCdQ4asiwQR4_LPyaimo4RAqU9es,9369
40
40
  gllm_inference/lm_invoker/google_lm_invoker.pyi,sha256=I3plg_oVuTl0hiShFBmCYPclP4gWbzU61xUSgon24Ew,17102
@@ -42,14 +42,14 @@ gllm_inference/lm_invoker/langchain_lm_invoker.pyi,sha256=bBGOxJfjnzOtDR4kH4PuCi
42
42
  gllm_inference/lm_invoker/litellm_lm_invoker.pyi,sha256=HHwW7i8ryXHI23JZQwscyva6aPmPOB13Muhf7gaaMUM,13376
43
43
  gllm_inference/lm_invoker/lm_invoker.pyi,sha256=Sd-ywxgPcIzyI5eA7XoqdkYG9hntEnihJfj6Ack7qr0,7975
44
44
  gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi,sha256=JemahodhaUsC2gsI7YSxnW4X3uX1cU4YCFdIvdWWY88,15203
45
- gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=SEHWAwpT8KmIQukurXtXOU2xyU2rp_HtM2SARsBF3dU,19892
45
+ gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=VFMvYXuwMuUHarsu5Xz7tKF6Bx6Ket5HaXZ4-7AtBY0,20011
46
46
  gllm_inference/lm_invoker/schema/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  gllm_inference/lm_invoker/schema/anthropic.pyi,sha256=lGJ7xYLchdtv6003Is4GcaKiGdbmIOAzQsaldKG0Aww,1041
48
48
  gllm_inference/lm_invoker/schema/bedrock.pyi,sha256=H3attoGWhBA725W4FpXw7Mty46N9jHKjw9PT-0lMEJs,975
49
49
  gllm_inference/lm_invoker/schema/datasaur.pyi,sha256=GLv6XAwKtWyRrX6EsbEufYjkPffHNiEpXwJOn9HqxMA,242
50
50
  gllm_inference/lm_invoker/schema/google.pyi,sha256=elXHrUMS46pbTsulk7hBXVVFcT022iD-_U_I590xeV8,529
51
51
  gllm_inference/lm_invoker/schema/langchain.pyi,sha256=uEG0DSD0z4L_rDMkBm-TtUy5oTyEHEEJWiLsYvFf1sw,431
52
- gllm_inference/lm_invoker/schema/openai.pyi,sha256=Cxp5QMkF6lspcVUgCNZR1qDK43Fj6OoEdOiQ1x5arsQ,1992
52
+ gllm_inference/lm_invoker/schema/openai.pyi,sha256=2KZkitU0jxFaR6x2AGe1FtawvxtUgTLDffY9T0Iq9yg,2017
53
53
  gllm_inference/lm_invoker/schema/openai_compatible.pyi,sha256=fVLRIrOvLJjhY7qPUgC3HRFoOFa7XimWLjr2EOo5qmQ,1226
54
54
  gllm_inference/lm_invoker/schema/xai.pyi,sha256=jpC6ZSBDUltzm9GjD6zvSFIPwqizn_ywLnjvwSa7KuU,663
55
55
  gllm_inference/lm_invoker/xai_lm_invoker.pyi,sha256=61Jihta4Mm1SZ72aGmXNPFW2g2_TJx7BxjfY_jugvVY,15723
@@ -85,7 +85,7 @@ gllm_inference/schema/config.pyi,sha256=NVmjQK6HipIE0dKSfx12hgIC0O-S1HEcAc-TWlXA
85
85
  gllm_inference/schema/enums.pyi,sha256=XmvxE7A-A8bX6hTikiAo_v66Z3hjMvhJGau1OUy9QDk,746
86
86
  gllm_inference/schema/lm_output.pyi,sha256=WP2LQrY0D03OJtFoaW_dGoJ_-yFUh2HbVlllgjzpYv4,1992
87
87
  gllm_inference/schema/message.pyi,sha256=jJV6A0ihEcun2OhzyMtNkiHnf7d6v5R-GdpTBGfJ0AQ,2272
88
- gllm_inference/schema/model_id.pyi,sha256=BIteIsEM19VIj_6wBkwKl_xd_iUpe21C7FIKh5BRC5I,5628
88
+ gllm_inference/schema/model_id.pyi,sha256=h2nAmYgUYjF8MjT9pTnRfrevYuSHeksEZHvizkmu6n8,5638
89
89
  gllm_inference/schema/reasoning.pyi,sha256=jbPxkDRHt0Vt-zdcc8lTT1l2hIE1Jm3HIHeNd0hfXGo,577
90
90
  gllm_inference/schema/token_usage.pyi,sha256=WJiGQyz5qatzBK2b-sABLCyTRLCBbAvxCRcqSJOzu-8,3025
91
91
  gllm_inference/schema/tool_call.pyi,sha256=OWT9LUqs_xfUcOkPG0aokAAqzLYYDkfnjTa0zOWvugk,403
@@ -96,8 +96,8 @@ gllm_inference/utils/io_utils.pyi,sha256=Eg7dvHWdXslTKdjh1j3dG50i7r35XG2zTmJ9XXv
96
96
  gllm_inference/utils/langchain.pyi,sha256=4AwFiVAO0ZpdgmqeC4Pb5NJwBt8vVr0MSUqLeCdTscc,1194
97
97
  gllm_inference/utils/validation.pyi,sha256=-RdMmb8afH7F7q4Ao7x6FbwaDfxUHn3hA3WiOgzB-3s,397
98
98
  gllm_inference.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
99
- gllm_inference.cp311-win_amd64.pyd,sha256=uu47XUC33wnxuf_SstBjHY70lXPvq85l8B2a7I6C2Oo,2987520
99
+ gllm_inference.cp311-win_amd64.pyd,sha256=Q-qE6Y9Yi694RPyvECZXGHBvp0ZKuE1dJh0z3zVqfho,2991104
100
100
  gllm_inference.pyi,sha256=lTVixRzlC12Joi4kW_vxnux0rLHAUB_3j7RMFOwLK-M,3616
101
- gllm_inference_binary-0.5.19.dist-info/METADATA,sha256=RYPC2mk8-uXt5cFOZh7VdB6ccfFPjZ7AjvSsCoCfZQI,4608
102
- gllm_inference_binary-0.5.19.dist-info/WHEEL,sha256=-FZBVKyKauScY3vLa8vJR6hBCpAJfFykw2MOwlNKr1g,98
103
- gllm_inference_binary-0.5.19.dist-info/RECORD,,
101
+ gllm_inference_binary-0.5.21.dist-info/METADATA,sha256=WQpwvWOgWHZ_SiRhj_yvS1h1AjLMKzyJ6QQTfvimSEw,4608
102
+ gllm_inference_binary-0.5.21.dist-info/WHEEL,sha256=-FZBVKyKauScY3vLa8vJR6hBCpAJfFykw2MOwlNKr1g,98
103
+ gllm_inference_binary-0.5.21.dist-info/RECORD,,