PyPI - promptbuilder - Versions diffs - 0.4.29__py3-none-any.whl → 0.4.31__py3-none-any.whl - Mend

promptbuilder 0.4.29py3-none-any.whl → 0.4.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

promptbuilder/llm_client/aisuite_client.py CHANGED Viewed

@@ -71,6 +71,7 @@ class AiSuiteLLMClient(BaseLLMClient):
         thinking_config: ThinkingConfig = ThinkingConfig(),
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -230,6 +231,7 @@ class AiSuiteLLMClientAsync(BaseLLMClientAsync):
         thinking_config: ThinkingConfig = ThinkingConfig(),
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:

promptbuilder/llm_client/anthropic_client.py CHANGED Viewed

@@ -200,6 +200,7 @@ class AnthropicLLMClient(BaseLLMClient):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -215,6 +216,9 @@ class AnthropicLLMClient(BaseLLMClient):
             "max_tokens": max_tokens,
             "messages": anthropic_messages,
         }
+        if timeout is not None:
+            anthropic_kwargs["timeout"] = timeout
         if thinking_config is None:
             thinking_config = self.default_thinking_config
@@ -453,6 +457,7 @@ class AnthropicLLMClientAsync(BaseLLMClientAsync):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -474,6 +479,9 @@ class AnthropicLLMClientAsync(BaseLLMClientAsync):
             "max_tokens": max_tokens,
             "messages": anthropic_messages,
         }
+        if timeout is not None:
+            anthropic_kwargs["timeout"] = timeout
         if thinking_config is None:
             thinking_config = self.default_thinking_config

promptbuilder/llm_client/base_client.py CHANGED Viewed

@@ -82,6 +82,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
         autocomplete: bool = False
@@ -98,6 +99,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
             thinking_config=thinking_config,
             system_message=system_message,
             max_tokens=max_tokens,
+            timeout=timeout,
             tools=tools,
             tool_config=tool_config,
         )
@@ -105,25 +107,28 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
         total_count = BaseLLMClient._response_out_tokens(response)
         finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
-        while autocomplete and response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
-            BaseLLMClient._append_generated_part(messages, response)
-            response = self._create(
-                messages=messages,
-                result_type=result_type,
-                thinking_config=thinking_config,
-                system_message=system_message,
-                max_tokens=max_tokens,
-                tools=tools,
-                tool_config=tool_config,
-            )
-            finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
-            total_count += BaseLLMClient._response_out_tokens(response)
-            if max_tokens is not None and total_count >= max_tokens:
-                break
-        if response.candidates:
-            BaseLLMClient._append_generated_part(messages, response)
-            response.candidates[0].content = messages[-1] if len(messages) > 0 else None
+        if autocomplete:
+            while autocomplete and response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
+                BaseLLMClient._append_generated_part(messages, response)
+                response = self._create(
+                    messages=messages,
+                    result_type=result_type,
+                    thinking_config=thinking_config,
+                    system_message=system_message,
+                    max_tokens=max_tokens,
+                    timeout=timeout,
+                    tools=tools,
+                    tool_config=tool_config,
+                )
+                finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
+                total_count += BaseLLMClient._response_out_tokens(response)
+                if max_tokens is not None and total_count >= max_tokens:
+                    break
+            if response.candidates and response.candidates[0].content:
+                appended_message = BaseLLMClient._append_generated_part(messages, response)
+                if appended_message is not None:
+                    response.candidates[0].content = appended_message
         return response
     @logfire_decorators.create
@@ -138,6 +143,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -152,6 +158,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: None = None,
         tool_choice_mode: Literal["NONE"] = "NONE",
         autocomplete: bool = False,
@@ -165,6 +172,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: None = None,
         tool_choice_mode: Literal["NONE"] = "NONE",
         autocomplete: bool = False,
@@ -178,6 +186,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: None = None,
         tool_choice_mode: Literal["NONE"] = "NONE",
         autocomplete: bool = False,
@@ -191,6 +200,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool],
         tool_choice_mode: Literal["ANY"],
         autocomplete: bool = False,
@@ -204,6 +214,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_choice_mode: Literal["ANY", "NONE"] = "NONE",
         autocomplete: bool = False,
@@ -215,6 +226,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
                 thinking_config=thinking_config,
                 system_message=system_message,
                 max_tokens=max_tokens,
+                timeout=timeout,
                 tools=tools,
                 tool_config=ToolConfig(function_calling_config=FunctionCallingConfig(mode=tool_choice_mode)),
             )
@@ -231,6 +243,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
             thinking_config=thinking_config,
             system_message=system_message,
             max_tokens=max_tokens,
+            timeout=timeout,
             tools=tools,
             tool_config=ToolConfig(function_calling_config=FunctionCallingConfig(mode=tool_choice_mode)),
             autocomplete=autocomplete,
@@ -245,7 +258,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
     @staticmethod
-    def _append_generated_part(messages: list[Content], response: Response):
+    def _append_generated_part(messages: list[Content], response: Response) -> Content | None:
         assert(response.candidates and response.candidates[0].content), "Response must contain at least one candidate with content."
         text_parts = [
@@ -262,7 +275,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
                 response_text = "".join(part.text for part in thought_parts)
                 is_thought = True
             else:
-                raise ValueError("No text or thought found in the response parts.")
+                return None
         if len(messages) > 0 and messages[-1].role == "model":
             message_to_append = messages[-1]
@@ -274,6 +287,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
                 message_to_append.parts.append(Part(text=response_text, thought=is_thought))
         else:
             messages.append(Content(parts=[Part(text=response_text, thought=is_thought)], role="model"))
+        return messages[-1]
     @staticmethod
     def _response_out_tokens(response: Response):
@@ -419,6 +433,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
         autocomplete: bool = False,
@@ -435,6 +450,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
             thinking_config=thinking_config,
             system_message=system_message,
             max_tokens=max_tokens,
+            timeout=timeout,
             tools=tools,
             tool_config=tool_config,
         )
@@ -442,25 +458,28 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
         total_count = BaseLLMClient._response_out_tokens(response)
         finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
-        while autocomplete and response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
-            BaseLLMClient._append_generated_part(messages, response)
-            response = await self._create(
-                messages=messages,
-                result_type=result_type,
-                thinking_config=thinking_config,
-                system_message=system_message,
-                max_tokens=max_tokens,
-                tools=tools,
-                tool_config=tool_config,
-            )
-            finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
-            total_count += BaseLLMClient._response_out_tokens(response)
-            if max_tokens is not None and total_count >= max_tokens:
-                break
-        if response.candidates:
-            BaseLLMClient._append_generated_part(messages, response)
-            response.candidates[0].content = messages[-1] if len(messages) > 0 else None
+        if autocomplete:
+            while autocomplete and response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
+                BaseLLMClient._append_generated_part(messages, response)
+                response = await self._create(
+                    messages=messages,
+                    result_type=result_type,
+                    thinking_config=thinking_config,
+                    system_message=system_message,
+                    max_tokens=max_tokens,
+                    timeout=timeout,
+                    tools=tools,
+                    tool_config=tool_config,
+                )
+                finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
+                total_count += BaseLLMClient._response_out_tokens(response)
+                if max_tokens is not None and total_count >= max_tokens:
+                    break
+            if response.candidates and response.candidates[0].content:
+                appended_message = BaseLLMClient._append_generated_part(messages, response)
+                if appended_message is not None:
+                    response.candidates[0].content = appended_message
         return response
     @logfire_decorators.create_async
@@ -475,6 +494,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -489,6 +509,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: None = None,
         tool_choice_mode: Literal["NONE"] = "NONE",
         autocomplete: bool = False,
@@ -502,6 +523,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: None = None,
         tool_choice_mode: Literal["NONE"] = "NONE",
         autocomplete: bool = False,
@@ -515,6 +537,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: None = None,
         tool_choice_mode: Literal["NONE"] = "NONE",
         autocomplete: bool = False,
@@ -528,6 +551,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool],
         tool_choice_mode: Literal["ANY"],
         autocomplete: bool = False,
@@ -541,6 +565,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_choice_mode: Literal["ANY", "NONE"] = "NONE",
         autocomplete: bool = False,
@@ -552,6 +577,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
                 thinking_config=thinking_config,
                 system_message=system_message,
                 max_tokens=max_tokens,
+                timeout=timeout,
                 tools=tools,
                 tool_config=ToolConfig(function_calling_config=FunctionCallingConfig(mode=tool_choice_mode)),
             )
@@ -568,6 +594,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
             thinking_config=thinking_config,
             system_message=system_message,
             max_tokens=max_tokens,
+            timeout=timeout,
             tools=tools,
             tool_config=ToolConfig(function_calling_config=FunctionCallingConfig(mode=tool_choice_mode)),
             autocomplete=autocomplete

promptbuilder/llm_client/bedrock_client.py CHANGED Viewed

@@ -111,6 +111,7 @@ class BedrockLLMClient(BaseLLMClient):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -120,7 +121,10 @@ class BedrockLLMClient(BaseLLMClient):
         if system_message is not None:
             bedrock_kwargs["system"] = [{"text": system_message}]
+        if timeout is not None:
+            bedrock_kwargs["timeout"] = timeout
         if max_tokens is None:
             max_tokens = self.default_max_tokens
         if max_tokens is not None:
@@ -407,6 +411,7 @@ class BedrockLLMClientAsync(BaseLLMClientAsync):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -417,6 +422,9 @@ class BedrockLLMClientAsync(BaseLLMClientAsync):
         if system_message is not None:
             bedrock_kwargs["system"] = [{"text": system_message}]
+        if timeout is not None:
+            bedrock_kwargs["timeout"] = timeout
         if max_tokens is None:
             max_tokens = self.default_max_tokens
         if max_tokens is not None:

promptbuilder/llm_client/google_client.py CHANGED Viewed

@@ -57,7 +57,7 @@ class GoogleLLMClient(BaseLLMClient):
             raise ValueError("To create a google llm client you need to either set the environment variable GOOGLE_API_KEY or pass the api_key in string format")
         super().__init__(GoogleLLMClient.PROVIDER, model, decorator_configs=decorator_configs, default_thinking_config=default_thinking_config, default_max_tokens=default_max_tokens)
         self._api_key = api_key
-        self.client = Client(api_key=api_key)
+        self.client = Client(api_key=api_key, **kwargs)
     @property
     def api_key(self) -> str:
@@ -95,6 +95,7 @@ class GoogleLLMClient(BaseLLMClient):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -107,6 +108,9 @@ class GoogleLLMClient(BaseLLMClient):
             tools=tools,
             tool_config=tool_config,
         )
+        if timeout is not None:
+            # Google processes timeout via HttpOptions on the request/config
+            config.http_options = types.HttpOptions(timeout=int(timeout * 1_000))
         if thinking_config is None:
             thinking_config = self.default_thinking_config
@@ -233,8 +237,8 @@ class GoogleLLMClientAsync(BaseLLMClientAsync):
             raise ValueError("To create a google llm client you need to either set the environment variable GOOGLE_API_KEY or pass the api_key in string format")
         super().__init__(GoogleLLMClientAsync.PROVIDER, model, decorator_configs=decorator_configs, default_thinking_config=default_thinking_config, default_max_tokens=default_max_tokens)
         self._api_key = api_key
-        self.client = Client(api_key=api_key)
+        self.client = Client(api_key=api_key, **kwargs)
     @property
     def api_key(self) -> str:
         return self._api_key
@@ -248,6 +252,7 @@ class GoogleLLMClientAsync(BaseLLMClientAsync):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -260,11 +265,13 @@ class GoogleLLMClientAsync(BaseLLMClientAsync):
             tools=tools,
             tool_config=tool_config,
         )
+        if timeout is not None:
+            config.http_options = types.HttpOptions(timeout=int(timeout * 1_000))
         if thinking_config is None:
             thinking_config = self.default_thinking_config
         config.thinking_config = thinking_config
         if result_type is None or result_type == "json":
             return await self.client.aio.models.generate_content(
                 model=self.model,

promptbuilder/llm_client/litellm_client.py CHANGED Viewed

@@ -139,6 +139,7 @@ class LiteLLMLLMClient(BaseLLMClient):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -167,6 +168,9 @@ class LiteLLMLLMClient(BaseLLMClient):
         if max_tokens is not None:
             kwargs["max_tokens"] = max_tokens
+        if timeout is not None:
+            # LiteLLM supports request_timeout in seconds
+            kwargs["request_timeout"] = timeout
         if tools is not None:
             lite_tools = []
@@ -351,6 +355,7 @@ class LiteLLMLLMClientAsync(BaseLLMClientAsync):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -377,6 +382,8 @@ class LiteLLMLLMClientAsync(BaseLLMClientAsync):
         if max_tokens is not None:
             kwargs["max_tokens"] = max_tokens
+        if timeout is not None:
+            kwargs["request_timeout"] = timeout
         if tools is not None:
             lite_tools = []

promptbuilder/llm_client/main.py CHANGED Viewed

@@ -10,7 +10,7 @@ from promptbuilder.llm_client.anthropic_client import AnthropicLLMClient, Anthro
 from promptbuilder.llm_client.openai_client import OpenaiLLMClient, OpenaiLLMClientAsync
 from promptbuilder.llm_client.bedrock_client import BedrockLLMClient, BedrockLLMClientAsync
 from promptbuilder.llm_client.aisuite_client import AiSuiteLLMClient, AiSuiteLLMClientAsync
-from promptbuilder.llm_client.litellm_client import LiteLLMLLMClient, LiteLLMLLMClientAsync
+from promptbuilder.llm_client.litellm_client import LiteLLMClient, LiteLLMClientAsync
@@ -24,14 +24,17 @@ def get_client(
     decorator_configs: DecoratorConfigs | None = None,
     default_thinking_config: ThinkingConfig | None = None,
     default_max_tokens: int | None = None,
+    **kwargs,
 ) -> BaseLLMClient:
     global _memory
-    kwargs = {
+    explicit_kwargs = {
         "decorator_configs": decorator_configs,
         "default_thinking_config": default_thinking_config,
         "default_max_tokens": default_max_tokens,
     }
+    # Merge explicit kwargs with additional kwargs, with explicit taking precedence
+    merged_kwargs = {**kwargs, **explicit_kwargs}
     provider_to_client_class: dict[str, type[BaseLLMClient]] = {
         "google": GoogleLLMClient,
         "anthropic": AnthropicLLMClient,
@@ -41,9 +44,9 @@ def get_client(
     provider, model = full_model_name.split(":", 1)
     if provider in provider_to_client_class:
         client_class = provider_to_client_class[provider]
-        client = client_class(model, api_key, **kwargs)
+        client = client_class(model, api_key, **merged_kwargs)
     else:
-        client = LiteLLMLLMClient(full_model_name, api_key, **kwargs)
+        client = LiteLLMClient(full_model_name, api_key, **merged_kwargs)
     if (full_model_name, client.api_key) in _memory:
         client = _memory[(full_model_name, client.api_key)]
@@ -65,14 +68,17 @@ def get_async_client(
     decorator_configs: DecoratorConfigs | None = None,
     default_thinking_config: ThinkingConfig | None = None,
     default_max_tokens: int | None = None,
+    **kwargs,
 ) -> BaseLLMClientAsync:
     global _memory_async
-    kwargs = {
+    explicit_kwargs = {
         "decorator_configs": decorator_configs,
         "default_thinking_config": default_thinking_config,
         "default_max_tokens": default_max_tokens,
     }
+    # Merge explicit kwargs with additional kwargs, with explicit taking precedence
+    merged_kwargs = {**kwargs, **explicit_kwargs}
     provider_to_client_class: dict[str, type[BaseLLMClientAsync]] = {
         "google": GoogleLLMClientAsync,
         "anthropic": AnthropicLLMClientAsync,
@@ -82,9 +88,9 @@ def get_async_client(
     provider, model = full_model_name.split(":", 1)
     if provider in provider_to_client_class:
         client_class = provider_to_client_class[provider]
-        client = client_class(model, api_key, **kwargs)
+        client = client_class(model, api_key, **merged_kwargs)
     else:
-        client = LiteLLMLLMClientAsync(full_model_name, api_key, **kwargs)
+        client = LiteLLMClientAsync(full_model_name, api_key, **merged_kwargs)
     if (full_model_name, client.api_key) in _memory_async:
         client = _memory_async[(full_model_name, client.api_key)]

promptbuilder/llm_client/openai_client.py CHANGED Viewed

@@ -150,6 +150,7 @@ class OpenaiLLMClient(BaseLLMClient):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -205,6 +206,9 @@ class OpenaiLLMClient(BaseLLMClient):
                 openai_kwargs["tool_choice"] = "required"
         if result_type is None or result_type == "json":
+            # Forward timeout to OpenAI per-request if provided
+            if timeout is not None:
+                openai_kwargs["timeout"] = timeout
             response = self.client.responses.create(**openai_kwargs)
             parts: list[Part] = []
@@ -227,6 +231,8 @@ class OpenaiLLMClient(BaseLLMClient):
                 ),
             )
         elif isinstance(result_type, type(BaseModel)):
+            if timeout is not None:
+                openai_kwargs["timeout"] = timeout
             response = self.client.responses.parse(**openai_kwargs, text_format=result_type)
             parts: list[Part] = []
@@ -385,6 +391,7 @@ class OpenaiLLMClientAsync(BaseLLMClientAsync):
         thinking_config: ThinkingConfig | None = None,
         system_message: str | None = None,
         max_tokens: int | None = None,
+        timeout: float | None = None,
         tools: list[Tool] | None = None,
         tool_config: ToolConfig = ToolConfig(),
     ) -> Response:
@@ -447,8 +454,9 @@ class OpenaiLLMClientAsync(BaseLLMClientAsync):
                 openai_kwargs["tool_choice"] = "required"
         if result_type is None or result_type == "json":
+            if timeout is not None:
+                openai_kwargs["timeout"] = timeout
             response = await self.client.responses.create(**openai_kwargs)
             parts: list[Part] = []
             for output_item in response.output:
                 if output_item.type == "message":
@@ -469,6 +477,8 @@ class OpenaiLLMClientAsync(BaseLLMClientAsync):
                 ),
             )
         elif isinstance(result_type, type(BaseModel)):
+            if timeout is not None:
+                openai_kwargs["timeout"] = timeout
             response = await self.client.responses.parse(**openai_kwargs, text_format=result_type)
             parts: list[Part] = []

promptbuilder/llm_client/vertex_client.py ADDED Viewed

@@ -0,0 +1,394 @@
+import os
+import importlib
+from functools import wraps
+from typing import AsyncIterator, Iterator, Callable, ParamSpec, Awaitable, Any, cast
+from pydantic import BaseModel, ConfigDict
+from tenacity import RetryError
+from vertexai import init as vertex_init
+from vertexai.generative_models import GenerativeModel
+from promptbuilder.llm_client.base_client import BaseLLMClient, BaseLLMClientAsync, ResultType
+from promptbuilder.llm_client.types import (
+    Response,
+    Content,
+    Candidate,
+    UsageMetadata,
+    Part,
+    PartLike,
+    ApiKey,
+    ThinkingConfig,
+    Tool,
+    ToolConfig,
+    Model,
+    CustomApiKey,
+)
+from promptbuilder.llm_client.config import DecoratorConfigs
+from promptbuilder.llm_client.utils import inherited_decorator
+from promptbuilder.llm_client.exceptions import APIError
+P = ParamSpec("P")
+class VertexApiKey(BaseModel, CustomApiKey):
+    model_config = ConfigDict(frozen=True)
+    project: str
+    location: str
+@inherited_decorator
+def _error_handler(func: Callable[P, Response]) -> Callable[P, Response]:
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except RetryError as retry_error:
+            e = retry_error.last_attempt._exception
+            if e is None:
+                raise APIError()
+            code = getattr(e, "code", None)
+            response_json = {
+                "status": getattr(e, "status", None),
+                "message": str(e),
+            }
+            response = getattr(e, "response", None)
+            raise APIError(code, response_json, response)
+        except Exception as e:  # noqa: BLE001
+            raise APIError(None, {"status": None, "message": str(e)}, None)
+    return wrapper
+def _to_vertex_content(messages: list[Content]):
+    gen_mod = importlib.import_module("vertexai.generative_models")
+    VPart = getattr(gen_mod, "Part")
+    VContent = getattr(gen_mod, "Content")
+    v_messages: list[Any] = []
+    for m in messages:
+        v_parts: list[Any] = []
+        if m.parts:
+            for p in m.parts:
+                if p.text is not None:
+                    v_parts.append(VPart.from_text(p.text))
+                elif p.inline_data is not None and p.inline_data.data is not None:
+                    v_parts.append(VPart.from_bytes(data=p.inline_data.data, mime_type=p.inline_data.mime_type or "application/octet-stream"))
+        v_messages.append(VContent(role=m.role, parts=v_parts))
+    return v_messages
+def _tool_to_vertex(tool: Tool):
+    VTool = getattr(importlib.import_module("vertexai.generative_models"), "Tool")
+    if not tool.function_declarations:
+        return VTool(function_declarations=[])
+    fds = []
+    for fd in tool.function_declarations:
+        fds.append({
+            "name": fd.name,
+            "description": fd.description,
+            "parameters": fd.parameters.model_dump() if fd.parameters is not None else None,
+            "response": fd.response.model_dump() if fd.response is not None else None,
+        })
+    return VTool(function_declarations=fds)
+def _tool_config_to_vertex(cfg: ToolConfig | None):
+    VToolConfig = getattr(importlib.import_module("vertexai.generative_models"), "ToolConfig")
+    if cfg is None or cfg.function_calling_config is None:
+        return None
+    mode = cfg.function_calling_config.mode or "AUTO"
+    allowed = cfg.function_calling_config.allowed_function_names
+    return VToolConfig(function_calling_config={"mode": mode, "allowedFunctionNames": allowed})
+def _from_vertex_response(v_resp: Any) -> Response:
+    candidates: list[Candidate] = []
+    if getattr(v_resp, "candidates", None):
+        for c in v_resp.candidates:
+            parts: list[Part] = []
+            if c.content and getattr(c.content, "parts", None):
+                for vp in c.content.parts:
+                    t = getattr(vp, "text", None)
+                    if isinstance(t, str):
+                        parts.append(Part(text=t))
+            candidates.append(Candidate(content=Content(parts=cast(list[Part | PartLike], parts), role="model")))
+    usage = None
+    um = getattr(v_resp, "usage_metadata", None)
+    if um is not None:
+        usage = UsageMetadata(
+            cached_content_token_count=getattr(um, "cached_content_token_count", None),
+            candidates_token_count=getattr(um, "candidates_token_count", None),
+            prompt_token_count=getattr(um, "prompt_token_count", None),
+            thoughts_token_count=getattr(um, "thoughts_token_count", None),
+            total_token_count=getattr(um, "total_token_count", None),
+        )
+    return Response(candidates=candidates, usage_metadata=usage)
+class VertexLLMClient(BaseLLMClient):
+    PROVIDER: str = "vertexai"
+    def __init__(
+        self,
+        model: str,
+        api_key: ApiKey | None = None,
+        decorator_configs: DecoratorConfigs | None = None,
+        default_thinking_config: ThinkingConfig | None = None,
+        default_max_tokens: int | None = None,
+        project: str | None = None,
+        location: str | None = None,
+        **kwargs,
+    ):
+        # Resolve project/location from args or env
+        project = project or os.getenv("VERTEXAI_PROJECT") or os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCLOUD_PROJECT")
+        location = location or os.getenv("VERTEXAI_LOCATION") or os.getenv("GOOGLE_CLOUD_REGION") or os.getenv("GOOGLE_CLOUD_LOCATION")
+        # Allow API Key (string) or ADC (VertexApiKey)
+        api_key_str: str | None = None
+        if isinstance(api_key, str):
+            api_key_str = api_key
+        elif api_key is None:
+            # Fallback to env vars for API key
+            api_key_str = os.getenv("VERTEX_API_KEY") or os.getenv("GOOGLE_API_KEY")
+        elif isinstance(api_key, VertexApiKey):
+            # ADC path with explicit project/location
+            pass
+        else:
+            # Unexpected CustomApiKey subtype
+            raise ValueError("Unsupported api_key type for Vertex: expected str or VertexApiKey")
+        if not project or not location:
+            raise ValueError("To create a vertexai llm client you need to provide project and location via args or env vars VERTEXAI_PROJECT and VERTEXAI_LOCATION")
+        if not isinstance(api_key, VertexApiKey):
+            api_key = VertexApiKey(project=project, location=location)
+        super().__init__(
+            VertexLLMClient.PROVIDER,
+            model,
+            decorator_configs=decorator_configs,
+            default_thinking_config=default_thinking_config,
+            default_max_tokens=default_max_tokens,
+        )
+        self._api_key = api_key
+        self._api_key_str = api_key_str
+        vertex_init(project=self._api_key.project, location=self._api_key.location)
+        self._model = GenerativeModel(self.model)
+    @property
+    def api_key(self) -> VertexApiKey:
+        return self._api_key
+    @_error_handler
+    def _create(
+        self,
+        messages: list[Content],
+        result_type: ResultType = None,
+        *,
+        thinking_config: ThinkingConfig | None = None,
+        system_message: str | None = None,
+        max_tokens: int | None = None,
+        timeout: float | None = None,
+        tools: list[Tool] | None = None,
+        tool_config: ToolConfig = ToolConfig(),
+    ) -> Response:
+        v_messages = _to_vertex_content(messages)
+        GenerationConfig = getattr(importlib.import_module("vertexai.generative_models"), "GenerationConfig")
+        gen_cfg = GenerationConfig(max_output_tokens=max_tokens or self.default_max_tokens)
+        req_opts: dict[str, Any] | None = {}
+        if timeout is not None:
+            req_opts["timeout"] = timeout
+        if self._api_key_str:
+            req_opts["api_key"] = self._api_key_str
+        if not req_opts:
+            req_opts = None
+        v_tools = None
+        if tools is not None:
+            v_tools = [_tool_to_vertex(t) for t in tools]
+        v_tool_cfg = _tool_config_to_vertex(tool_config)
+        v_resp = self._model.generate_content(
+            contents=v_messages,
+            generation_config=gen_cfg,
+            tools=v_tools,
+            tool_config=v_tool_cfg,
+            system_instruction=system_message,
+            request_options=req_opts,
+        )
+        resp = _from_vertex_response(v_resp)
+        if result_type == "json" and resp.text is not None:
+            resp.parsed = BaseLLMClient.as_json(resp.text)
+        elif isinstance(result_type, type(BaseModel)) and resp.text is not None:
+            parsed = BaseLLMClient.as_json(resp.text)
+            resp.parsed = result_type.model_validate(parsed)
+        return resp
+    def create_stream(
+        self,
+        messages: list[Content],
+        *,
+        thinking_config: ThinkingConfig | None = None,
+        system_message: str | None = None,
+        max_tokens: int | None = None,
+    ) -> Iterator[Response]:
+        v_messages = _to_vertex_content(messages)
+        GenerationConfig = getattr(importlib.import_module("vertexai.generative_models"), "GenerationConfig")
+        gen_cfg = GenerationConfig(max_output_tokens=max_tokens or self.default_max_tokens)
+        # Handle thinking config
+        if thinking_config is None:
+            thinking_config = self.default_thinking_config
+        if thinking_config is not None:
+            # Store for potential future use when Vertex AI supports thinking features
+            pass
+        req_opts: dict[str, Any] | None = {}
+        if self._api_key_str:
+            req_opts["api_key"] = self._api_key_str
+        if not req_opts:
+            req_opts = None
+        stream = self._model.generate_content(
+            contents=v_messages,
+            generation_config=gen_cfg,
+            system_instruction=system_message,
+            request_options=req_opts,
+            stream=True,
+        )
+        for ev in stream:
+            yield _from_vertex_response(ev)
+    @staticmethod
+    def models_list() -> list[Model]:
+        return []
+@inherited_decorator
+def _error_handler_async(func: Callable[P, Awaitable[Response]]) -> Callable[P, Awaitable[Response]]:
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        try:
+            return await func(*args, **kwargs)
+        except RetryError as retry_error:
+            e = retry_error.last_attempt._exception
+            if e is None:
+                raise APIError()
+            code = getattr(e, "code", None)
+            response_json = {
+                "status": getattr(e, "status", None),
+                "message": str(e),
+            }
+            response = getattr(e, "response", None)
+            raise APIError(code, response_json, response)
+        except Exception as e:  # noqa: BLE001
+            raise APIError(None, {"status": None, "message": str(e)}, None)
+    return wrapper
+class VertexLLMClientAsync(BaseLLMClientAsync):
+    PROVIDER: str = "vertexai"
+    def __init__(
+        self,
+        model: str,
+        api_key: ApiKey | None = None,
+        decorator_configs: DecoratorConfigs | None = None,
+        default_thinking_config: ThinkingConfig | None = None,
+        default_max_tokens: int | None = None,
+        project: str | None = None,
+        location: str | None = None,
+        **kwargs,
+    ):
+        project = project or os.getenv("VERTEXAI_PROJECT") or os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCLOUD_PROJECT")
+        location = location or os.getenv("VERTEXAI_LOCATION") or os.getenv("GOOGLE_CLOUD_REGION") or os.getenv("GOOGLE_CLOUD_LOCATION")
+        api_key_str: str | None = None
+        if isinstance(api_key, str):
+            api_key_str = api_key
+        elif api_key is None:
+            api_key_str = os.getenv("VERTEX_API_KEY") or os.getenv("GOOGLE_API_KEY")
+        elif isinstance(api_key, VertexApiKey):
+            pass
+        else:
+            raise ValueError("Unsupported api_key type for Vertex: expected str or VertexApiKey")
+        if not project or not location:
+            raise ValueError("To create a vertexai llm client you need to provide project and location via args or env vars VERTEXAI_PROJECT and VERTEXAI_LOCATION")
+        if not isinstance(api_key, VertexApiKey):
+            api_key = VertexApiKey(project=project, location=location)
+        super().__init__(
+            VertexLLMClientAsync.PROVIDER,
+            model,
+            decorator_configs=decorator_configs,
+            default_thinking_config=default_thinking_config,
+            default_max_tokens=default_max_tokens,
+        )
+        self._api_key = api_key
+        self._api_key_str = api_key_str
+        vertex_init(project=self._api_key.project, location=self._api_key.location)
+        self._model = GenerativeModel(self.model)
+    @property
+    def api_key(self) -> VertexApiKey:
+        return self._api_key
+    @_error_handler_async
+    async def _create(
+        self,
+        messages: list[Content],
+        result_type: ResultType = None,
+        *,
+        thinking_config: ThinkingConfig | None = None,
+        system_message: str | None = None,
+        max_tokens: int | None = None,
+        timeout: float | None = None,
+        tools: list[Tool] | None = None,
+        tool_config: ToolConfig = ToolConfig(),
+    ) -> Response:
+        # Reuse sync implementation (SDK is sync). For real async, offload to thread.
+        client = VertexLLMClient(
+            model=self.model,
+            api_key=self._api_key,
+            decorator_configs=self._decorator_configs,
+            default_thinking_config=self.default_thinking_config,
+            default_max_tokens=self.default_max_tokens,
+        )
+        return client._create(
+            messages=messages,
+            result_type=result_type,
+            thinking_config=thinking_config,
+            system_message=system_message,
+            max_tokens=max_tokens,
+            timeout=timeout,
+            tools=tools,
+            tool_config=tool_config,
+        )
+    async def create_stream(
+        self,
+        messages: list[Content],
+        *,
+        thinking_config: ThinkingConfig | None = None,
+        system_message: str | None = None,
+        max_tokens: int | None = None,
+    ) -> AsyncIterator[Response]:
+        # Provide a simple wrapper yielding once (non-streaming)
+        resp = await self._create(
+            messages=messages,
+            result_type=None,
+            thinking_config=thinking_config,
+            system_message=system_message,
+            max_tokens=max_tokens,
+        )
+        yield resp
+    @staticmethod
+    def models_list() -> list[Model]:
+        return VertexLLMClient.models_list()

{promptbuilder-0.4.29.dist-info → promptbuilder-0.4.31.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: promptbuilder
-Version: 0.4.29
+Version: 0.4.31
 Summary: Library for building prompts for LLMs
 Home-page: https://github.com/kapulkin/promptbuilder
 Author: Kapulkin Stanislav
@@ -18,6 +18,9 @@ Requires-Dist: google-genai>=1.4.0
 Requires-Dist: anthropic
 Requires-Dist: openai
 Requires-Dist: aioboto3
+Requires-Dist: litellm
+Requires-Dist: httpx
+Requires-Dist: aiohttp
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier

{promptbuilder-0.4.29.dist-info → promptbuilder-0.4.31.dist-info}/RECORD RENAMED Viewed

@@ -7,21 +7,22 @@ promptbuilder/agent/context.py,sha256=CVw715vFrhfvddQmRNy4A1U87GsZyIKj9Xu4SCidbc
 promptbuilder/agent/tool.py,sha256=VDbIHK3_Q62Ei7hwLF7nIgHq-PTMKnv1NSjHpDYkUZE,2651
 promptbuilder/agent/utils.py,sha256=vTkphKw04v_QDIJtoB2JKK0RGY6iI1t_0LbmuStunzI,356
 promptbuilder/llm_client/__init__.py,sha256=wJ33cnRtZX_YPsbcGxEu3SEZMOhPX7-fHI59MEPUe7I,517
-promptbuilder/llm_client/aisuite_client.py,sha256=_TfB1xv73kIn4n8ulV3bj8bHOVm4DOD5uIdX-pbYoXY,15563
-promptbuilder/llm_client/anthropic_client.py,sha256=HSIAZVOQoi3hinjhBVGqpt91k0x38xj6EUsPSUrlAA0,28076
-promptbuilder/llm_client/base_client.py,sha256=T-51WbVuMktCdoI0Z7Mk4LcaqV6GRqoDiZeZJPKoimI,28763
-promptbuilder/llm_client/bedrock_client.py,sha256=rJMzVV7x_sNJ1nVVqWU1sU-Pq7xlxFxIa_hTD2wtM1Y,27943
+promptbuilder/llm_client/aisuite_client.py,sha256=8inY3UoH8o9yEOvRYP6a_8pjGQK0W_f9eV8MmHzpKTU,15641
+promptbuilder/llm_client/anthropic_client.py,sha256=GL5FRmqu2iQqU44joaviEaRpEp4h_USpUiYc8sWu52Y,28326
+promptbuilder/llm_client/base_client.py,sha256=x9s_pyOiOWlSjTnRo162GWcI4pILoCCwomFoLGrn0RU,29922
+promptbuilder/llm_client/bedrock_client.py,sha256=PGb7KxaK0QwhsZ9frz07h7I2zeyjMMWqIYC7DS6AZp0,28181
 promptbuilder/llm_client/config.py,sha256=exQEm35wp7lK5SfXNpN5H9VZEb2LVa4pyZ-cxGt1U-U,1124
 promptbuilder/llm_client/exceptions.py,sha256=t-X7r_a8B1jNu8eEavde1jXu5dz97yV3IG4YHOtgh0Y,4836
-promptbuilder/llm_client/google_client.py,sha256=GzKd_EeJY_GEoZrI6I3ZUAk1PRDBBkdJfEPUKgmlUXM,11782
-promptbuilder/llm_client/litellm_client.py,sha256=WfObiNTzgu4CFPUNeN4TmNBC6o_dPmB5P9DI5k3vcRg,25284
+promptbuilder/llm_client/google_client.py,sha256=ZjJjDUQZH6zAIRoi4xUx3IDEm8jRkVWGyehy5P_Ba_M,12170
+promptbuilder/llm_client/litellm_client.py,sha256=XoYZmeU8XuROhvzVqbdjaWPktOSVKjehIAZgC1C6Lgo,25585
 promptbuilder/llm_client/logfire_decorators.py,sha256=un_QnIekypOEcqTZ5v1y9pwijGnF95xwnwKO5rFSHVY,9667
-promptbuilder/llm_client/main.py,sha256=m-9jM2IYMFy6aZBUmPb52wpFlIK0H1aRj293oFmxLjU,7907
-promptbuilder/llm_client/openai_client.py,sha256=lT0RCiixJBoCtzUbL_0J5NQ5G8KGONzK3dQ73_NgL78,24789
+promptbuilder/llm_client/main.py,sha256=2Q7J5FwivX2YwvptzoSEtCfvfcI9p5HC55D3mMb2se4,8243
+promptbuilder/llm_client/openai_client.py,sha256=QMXX7VPYWFo1VvX8bWF6jpi95ZIOk_MMBpz-14GrT-k,25274
 promptbuilder/llm_client/types.py,sha256=kgbg5FRzvZwu98y1OhAZJDneXBNPnsFZueQCr9HXIY4,8063
 promptbuilder/llm_client/utils.py,sha256=79lvSppjrrItHB5MIozbp_5Oq7TsOK4Qzt9Ae3XMLFw,7624
-promptbuilder-0.4.29.dist-info/licenses/LICENSE,sha256=fqXmInzgsvEOIaKSBgcrwKyYCGYF0MKErJ0YivtODcc,1096
-promptbuilder-0.4.29.dist-info/METADATA,sha256=rfxQaTbrKl5se6wrV6b2QfMDhtSrxG7WbkG06fGtNCo,3729
-promptbuilder-0.4.29.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-promptbuilder-0.4.29.dist-info/top_level.txt,sha256=UBVcYn4UgrPy3O3fmmnPEU_kieuplBMgheetIMei4EI,14
-promptbuilder-0.4.29.dist-info/RECORD,,
+promptbuilder/llm_client/vertex_client.py,sha256=aewidTryIpFMlTRFmDqOG7O-NCbvTP5wW6I3-3vQShE,15002
+promptbuilder-0.4.31.dist-info/licenses/LICENSE,sha256=fqXmInzgsvEOIaKSBgcrwKyYCGYF0MKErJ0YivtODcc,1096
+promptbuilder-0.4.31.dist-info/METADATA,sha256=bcAusvEhtctjGM_TGcZb6cpjbmD3BdSj6ajd6gawga0,3799
+promptbuilder-0.4.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+promptbuilder-0.4.31.dist-info/top_level.txt,sha256=UBVcYn4UgrPy3O3fmmnPEU_kieuplBMgheetIMei4EI,14
+promptbuilder-0.4.31.dist-info/RECORD,,

{promptbuilder-0.4.29.dist-info → promptbuilder-0.4.31.dist-info}/WHEEL RENAMED Viewed

File without changes

{promptbuilder-0.4.29.dist-info → promptbuilder-0.4.31.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{promptbuilder-0.4.29.dist-info → promptbuilder-0.4.31.dist-info}/top_level.txt RENAMED Viewed

File without changes

promptbuilder 0.4.29__py3-none-any.whl → 0.4.31__py3-none-any.whl

promptbuilder 0.4.29py3-none-any.whl → 0.4.31py3-none-any.whl