PyPI - gllm-inference-binary - Versions diffs - 0.5.19__cp311-cp311-win_amd64.whl → 0.5.21__cp311-cp311-win_amd64.whl - Mend

gllm-inference-binary 0.5.19__cp311-cp311-win_amd64.whl → 0.5.21__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gllm-inference-binary might be problematic. Click here for more details.

Files changed (9) hide show

gllm_inference/builder/build_lm_invoker.pyi CHANGED Viewed

@@ -107,7 +107,7 @@ def build_lm_invoker(model_id: str | ModelId, credentials: str | dict[str, Any]
         # Using Azure OpenAI
         ```python
         lm_invoker = build_lm_invoker(
-            model_id="azure-openai/https://my-resource.openai.azure.com:my-deployment",
+            model_id="azure-openai/https://my-resource.openai.azure.com/openai/v1:my-deployment",
             credentials="azure-api-key"
         )
         ```

gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi CHANGED Viewed

@@ -1,13 +1,15 @@
 from _typeshed import Incomplete
 from gllm_core.schema.tool import Tool as Tool
 from gllm_core.utils.retry import RetryConfig as RetryConfig
-from gllm_inference.constants import DEFAULT_AZURE_OPENAI_API_VERSION as DEFAULT_AZURE_OPENAI_API_VERSION, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
+from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
 from gllm_inference.lm_invoker.openai_lm_invoker import OpenAILMInvoker as OpenAILMInvoker, ReasoningEffort as ReasoningEffort, ReasoningSummary as ReasoningSummary
 from gllm_inference.lm_invoker.schema.openai import Key as Key
 from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, ResponseSchema as ResponseSchema
 from langchain_core.tools import Tool as LangChainTool
 from typing import Any
+URL_SUFFIX: str
 class AzureOpenAILMInvoker(OpenAILMInvoker):
     '''A language model invoker to interact with Azure OpenAI language models.
@@ -33,7 +35,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
         The `AzureOpenAILMInvoker` can be used as follows:
         ```python
         lm_invoker = AzureOpenAILMInvoker(
-            azure_endpoint="https://<your-azure-openai-endpoint>.openai.azure.com/",
+            azure_endpoint="https://<your-azure-openai-endpoint>.openai.azure.com/openai/v1",
             azure_deployment="<your-azure-openai-deployment>",
         )
         result = await lm_invoker.invoke("Hi there!")
@@ -158,17 +160,17 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
         ```
     Reasoning:
-        Azure OpenAI\'s o-series models are classified as reasoning models. Reasoning models think before they answer,
-        producing a long internal chain of thought before responding to the user. Reasoning models excel in
-        complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
+        Azure OpenAI\'s GPT-5 models and o-series models are classified as reasoning models. Reasoning models think
+        before they answer, producing a long internal chain of thought before responding to the user. Reasoning models
+        excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
         The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
-        will guide the models on how many reasoning tokens it should generate before creating a response to the prompt.
+        will guide the models on how many reasoning tokens it should generate before creating a response.
         Available options include:
-        1. "low": Favors speed and economical token usage.
-        2. "medium": Favors a balance between speed and reasoning accuracy.
-        3. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
-        When not set, the reasoning effort will be equivalent to `medium` by default.
+        1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
+        2. "low": Favors speed and economical token usage.
+        3. "medium": Favors a balance between speed and reasoning accuracy.
+        4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
         Azure OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be
         generated. The summary level can be set via the `reasoning_summary` parameter. Available options include:
@@ -220,7 +222,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
                 Defaults to an empty list.
     '''
     client: Incomplete
-    def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
+    def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, tools: list[Tool | LangChainTool] | None = None, response_schema: ResponseSchema | None = None, output_analytics: bool = False, retry_config: RetryConfig | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_summary: ReasoningSummary | None = None) -> None:
         """Initializes a new instance of the AzureOpenAILMInvoker class.
         Args:
@@ -228,8 +230,7 @@ class AzureOpenAILMInvoker(OpenAILMInvoker):
             azure_deployment (str): The deployment name of the Azure OpenAI service.
             api_key (str | None, optional): The API key for authenticating with Azure OpenAI. Defaults to None, in
                 which case the `AZURE_OPENAI_API_KEY` environment variable will be used.
-            api_version (str, optional): The API version of the Azure OpenAI service. Defaults to
-                `DEFAULT_AZURE_OPENAI_API_VERSION`.
+            api_version (str | None, optional): Deprecated parameter to be removed in v0.6. Defaults to None.
             model_kwargs (dict[str, Any] | None, optional): Additional model parameters. Defaults to None.
             default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
                 Defaults to None.

gllm_inference/lm_invoker/openai_lm_invoker.pyi CHANGED Viewed

@@ -159,16 +159,17 @@ class OpenAILMInvoker(BaseLMInvoker):
         ```
     Reasoning:
-        OpenAI\'s o-series models are classified as reasoning models. Reasoning models think before they answer,
-        producing a long internal chain of thought before responding to the user. Reasoning models excel in
-        complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
+        OpenAI\'s GPT-5 models and o-series models are classified as reasoning models. Reasoning models think before
+        they answer, producing a long internal chain of thought before responding to the user. Reasoning models
+        excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows.
         The reasoning effort of reasoning models can be set via the `reasoning_effort` parameter. This parameter
         will guide the models on how many reasoning tokens it should generate before creating a response.
         Available options include:
-        1. "low": Favors speed and economical token usage.
-        2. "medium": Favors a balance between speed and reasoning accuracy.
-        3. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
+        1. "minimal": Favors the least amount of reasoning, only supported for GPT-5 models onwards.
+        2. "low": Favors speed and economical token usage.
+        3. "medium": Favors a balance between speed and reasoning accuracy.
+        4. "high": Favors more complete reasoning at the cost of more tokens generated and slower responses.
         When not set, the reasoning effort will be equivalent to `medium` by default.
         OpenAI doesn\'t expose the raw reasoning tokens. However, the summary of the reasoning tokens can still be

gllm_inference/lm_invoker/schema/openai.pyi CHANGED Viewed

@@ -83,6 +83,7 @@ class ReasoningEffort(StrEnum):
     HIGH = 'high'
     MEDIUM = 'medium'
     LOW = 'low'
+    MINIMAL = 'minimal'
 class ReasoningSummary(StrEnum):
     """Defines the reasoning summary for reasoning models."""

gllm_inference/schema/model_id.pyi CHANGED Viewed

@@ -57,7 +57,7 @@ class ModelId(BaseModel):
         # Using Azure OpenAI
         ```python
-        model_id = ModelId.from_string("azure-openai/https://my-resource.openai.azure.com:my-deployment")
+        model_id = ModelId.from_string("azure-openai/https://my-resource.openai.azure.com/openai/v1:my-deployment")
         ```
         # Using OpenAI compatible endpoints (e.g. Groq)

gllm_inference.cp311-win_amd64.pyd CHANGED Viewed

Binary file

{gllm_inference_binary-0.5.19.dist-info → gllm_inference_binary-0.5.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gllm-inference-binary
-Version: 0.5.19
+Version: 0.5.21
 Summary: A library containing components related to model inferences in Gen AI applications.
 Author: Henry Wicaksono
 Author-email: henry.wicaksono@gdplabs.id

{gllm_inference_binary-0.5.19.dist-info → gllm_inference_binary-0.5.21.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 gllm_inference/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gllm_inference/builder/__init__.pyi,sha256=-bw1uDx7CAM7pkvjvb1ZXku9zXlQ7aEAyC83KIn3bz8,506
 gllm_inference/builder/build_em_invoker.pyi,sha256=cBqifw0frhYZinDndeCjqDVqv7oeW728-i5nH4JMyhk,6010
-gllm_inference/builder/build_lm_invoker.pyi,sha256=uVHupv0yAP8WGUqu1fTSSrvIV9KK8UvYydXI-hMCqlU,7037
+gllm_inference/builder/build_lm_invoker.pyi,sha256=p63iuVBOOpNizItGK6HDxYDrgXdovtfSe0VrvrEd-PA,7047
 gllm_inference/builder/build_lm_request_processor.pyi,sha256=0pJINCP4nnXVwuhIbhsaiwzjX8gohQt2oqXFZhTFSUs,4584
 gllm_inference/builder/build_output_parser.pyi,sha256=sgSTrzUmSRxPzUUum0fDU7A3NXYoYhpi6bEx4Q2XMnA,965
 gllm_inference/catalog/__init__.pyi,sha256=HWgPKWIzprpMHRKe_qN9BZSIQhVhrqiyjLjIXwvj1ho,291
@@ -34,7 +34,7 @@ gllm_inference/exceptions/error_parser.pyi,sha256=ggmh8DJXdwFJInNLrP24WVJt_4raxb
 gllm_inference/exceptions/exceptions.pyi,sha256=ViXvIzm7tLcstjqfwC6nPziDg0UAmoUAWZVWrAJyp3w,4763
 gllm_inference/lm_invoker/__init__.pyi,sha256=eE_HDCl9A135mi6mtIV55q-T9J1O8OpbMcqWuny3w9A,1214
 gllm_inference/lm_invoker/anthropic_lm_invoker.pyi,sha256=85uvShLv4-eiGOpTMgwWpQGZXPW6XaB6GrexBmxg_sQ,15200
-gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=N2TjGz5Gi6xiLkAgI6SzWq_V3tj66HJfMNff7d04uU0,14856
+gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi,sha256=Ek7pZYaSWWFdPKI6iPKiICvZxN7xCVglQleTSSwW5ok,14799
 gllm_inference/lm_invoker/bedrock_lm_invoker.pyi,sha256=ae5P_9sjtcOgMIUaRchvp8F0FujoeP4e2F_OoHSe_go,12655
 gllm_inference/lm_invoker/datasaur_lm_invoker.pyi,sha256=c4H3TOz0LIhWjokCCdQ4asiwQR4_LPyaimo4RAqU9es,9369
 gllm_inference/lm_invoker/google_lm_invoker.pyi,sha256=I3plg_oVuTl0hiShFBmCYPclP4gWbzU61xUSgon24Ew,17102
@@ -42,14 +42,14 @@ gllm_inference/lm_invoker/langchain_lm_invoker.pyi,sha256=bBGOxJfjnzOtDR4kH4PuCi
 gllm_inference/lm_invoker/litellm_lm_invoker.pyi,sha256=HHwW7i8ryXHI23JZQwscyva6aPmPOB13Muhf7gaaMUM,13376
 gllm_inference/lm_invoker/lm_invoker.pyi,sha256=Sd-ywxgPcIzyI5eA7XoqdkYG9hntEnihJfj6Ack7qr0,7975
 gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi,sha256=JemahodhaUsC2gsI7YSxnW4X3uX1cU4YCFdIvdWWY88,15203
-gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=SEHWAwpT8KmIQukurXtXOU2xyU2rp_HtM2SARsBF3dU,19892
+gllm_inference/lm_invoker/openai_lm_invoker.pyi,sha256=VFMvYXuwMuUHarsu5Xz7tKF6Bx6Ket5HaXZ4-7AtBY0,20011
 gllm_inference/lm_invoker/schema/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gllm_inference/lm_invoker/schema/anthropic.pyi,sha256=lGJ7xYLchdtv6003Is4GcaKiGdbmIOAzQsaldKG0Aww,1041
 gllm_inference/lm_invoker/schema/bedrock.pyi,sha256=H3attoGWhBA725W4FpXw7Mty46N9jHKjw9PT-0lMEJs,975
 gllm_inference/lm_invoker/schema/datasaur.pyi,sha256=GLv6XAwKtWyRrX6EsbEufYjkPffHNiEpXwJOn9HqxMA,242
 gllm_inference/lm_invoker/schema/google.pyi,sha256=elXHrUMS46pbTsulk7hBXVVFcT022iD-_U_I590xeV8,529
 gllm_inference/lm_invoker/schema/langchain.pyi,sha256=uEG0DSD0z4L_rDMkBm-TtUy5oTyEHEEJWiLsYvFf1sw,431
-gllm_inference/lm_invoker/schema/openai.pyi,sha256=Cxp5QMkF6lspcVUgCNZR1qDK43Fj6OoEdOiQ1x5arsQ,1992
+gllm_inference/lm_invoker/schema/openai.pyi,sha256=2KZkitU0jxFaR6x2AGe1FtawvxtUgTLDffY9T0Iq9yg,2017
 gllm_inference/lm_invoker/schema/openai_compatible.pyi,sha256=fVLRIrOvLJjhY7qPUgC3HRFoOFa7XimWLjr2EOo5qmQ,1226
 gllm_inference/lm_invoker/schema/xai.pyi,sha256=jpC6ZSBDUltzm9GjD6zvSFIPwqizn_ywLnjvwSa7KuU,663
 gllm_inference/lm_invoker/xai_lm_invoker.pyi,sha256=61Jihta4Mm1SZ72aGmXNPFW2g2_TJx7BxjfY_jugvVY,15723
@@ -85,7 +85,7 @@ gllm_inference/schema/config.pyi,sha256=NVmjQK6HipIE0dKSfx12hgIC0O-S1HEcAc-TWlXA
 gllm_inference/schema/enums.pyi,sha256=XmvxE7A-A8bX6hTikiAo_v66Z3hjMvhJGau1OUy9QDk,746
 gllm_inference/schema/lm_output.pyi,sha256=WP2LQrY0D03OJtFoaW_dGoJ_-yFUh2HbVlllgjzpYv4,1992
 gllm_inference/schema/message.pyi,sha256=jJV6A0ihEcun2OhzyMtNkiHnf7d6v5R-GdpTBGfJ0AQ,2272
-gllm_inference/schema/model_id.pyi,sha256=BIteIsEM19VIj_6wBkwKl_xd_iUpe21C7FIKh5BRC5I,5628
+gllm_inference/schema/model_id.pyi,sha256=h2nAmYgUYjF8MjT9pTnRfrevYuSHeksEZHvizkmu6n8,5638
 gllm_inference/schema/reasoning.pyi,sha256=jbPxkDRHt0Vt-zdcc8lTT1l2hIE1Jm3HIHeNd0hfXGo,577
 gllm_inference/schema/token_usage.pyi,sha256=WJiGQyz5qatzBK2b-sABLCyTRLCBbAvxCRcqSJOzu-8,3025
 gllm_inference/schema/tool_call.pyi,sha256=OWT9LUqs_xfUcOkPG0aokAAqzLYYDkfnjTa0zOWvugk,403
@@ -96,8 +96,8 @@ gllm_inference/utils/io_utils.pyi,sha256=Eg7dvHWdXslTKdjh1j3dG50i7r35XG2zTmJ9XXv
 gllm_inference/utils/langchain.pyi,sha256=4AwFiVAO0ZpdgmqeC4Pb5NJwBt8vVr0MSUqLeCdTscc,1194
 gllm_inference/utils/validation.pyi,sha256=-RdMmb8afH7F7q4Ao7x6FbwaDfxUHn3hA3WiOgzB-3s,397
 gllm_inference.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
-gllm_inference.cp311-win_amd64.pyd,sha256=uu47XUC33wnxuf_SstBjHY70lXPvq85l8B2a7I6C2Oo,2987520
+gllm_inference.cp311-win_amd64.pyd,sha256=Q-qE6Y9Yi694RPyvECZXGHBvp0ZKuE1dJh0z3zVqfho,2991104
 gllm_inference.pyi,sha256=lTVixRzlC12Joi4kW_vxnux0rLHAUB_3j7RMFOwLK-M,3616
-gllm_inference_binary-0.5.19.dist-info/METADATA,sha256=RYPC2mk8-uXt5cFOZh7VdB6ccfFPjZ7AjvSsCoCfZQI,4608
-gllm_inference_binary-0.5.19.dist-info/WHEEL,sha256=-FZBVKyKauScY3vLa8vJR6hBCpAJfFykw2MOwlNKr1g,98
-gllm_inference_binary-0.5.19.dist-info/RECORD,,
+gllm_inference_binary-0.5.21.dist-info/METADATA,sha256=WQpwvWOgWHZ_SiRhj_yvS1h1AjLMKzyJ6QQTfvimSEw,4608
+gllm_inference_binary-0.5.21.dist-info/WHEEL,sha256=-FZBVKyKauScY3vLa8vJR6hBCpAJfFykw2MOwlNKr1g,98
+gllm_inference_binary-0.5.21.dist-info/RECORD,,

{gllm_inference_binary-0.5.19.dist-info → gllm_inference_binary-0.5.21.dist-info}/WHEEL RENAMED Viewed

File without changes