PyPI - promptlayer - Versions diffs - 1.0.35__py3-none-any.whl → 1.0.78__py3-none-any.whl - Mend

promptlayer 1.0.35py3-none-any.whl → 1.0.78py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

promptlayer/__init__.py +37 -2
promptlayer/exceptions.py +119 -0
promptlayer/groups/__init__.py +9 -5
promptlayer/groups/groups.py +4 -6
promptlayer/promptlayer.py +237 -104
promptlayer/promptlayer_base.py +31 -40
promptlayer/promptlayer_mixins.py +216 -65
promptlayer/span_exporter.py +19 -24
promptlayer/streaming/__init__.py +64 -0
promptlayer/streaming/blueprint_builder.py +382 -0
promptlayer/streaming/response_handlers.py +960 -0
promptlayer/streaming/stream_processor.py +106 -0
promptlayer/templates.py +14 -12
promptlayer/track/__init__.py +32 -20
promptlayer/track/track.py +47 -30
promptlayer/types/prompt_template.py +33 -1
promptlayer/utils.py +1256 -967
{promptlayer-1.0.35.dist-info → promptlayer-1.0.78.dist-info}/METADATA +16 -12
promptlayer-1.0.78.dist-info/RECORD +23 -0
{promptlayer-1.0.35.dist-info → promptlayer-1.0.78.dist-info}/WHEEL +1 -1
promptlayer-1.0.35.dist-info/RECORD +0 -18
{promptlayer-1.0.35.dist-info → promptlayer-1.0.78.dist-info/licenses}/LICENSE +0 -0

promptlayer/utils.py CHANGED Viewed

@@ -3,29 +3,38 @@ import contextvars
 import datetime
 import functools
 import json
+import logging
 import os
-import sys
 import types
+from contextlib import asynccontextmanager
 from copy import deepcopy
 from enum import Enum
-from typing import (
-    Any,
-    AsyncGenerator,
-    AsyncIterable,
-    Callable,
-    Dict,
-    Generator,
-    List,
-    Optional,
-    Union,
-)
+from typing import Any, Callable, Coroutine, Dict, List, Optional, Union
+from urllib.parse import quote
+from uuid import uuid4
 import httpx
 import requests
+import urllib3
+import urllib3.util
 from ably import AblyRealtime
 from ably.types.message import Message
+from centrifuge import (
+    Client,
+    PublicationContext,
+    SubscriptionEventHandler,
+    SubscriptionState,
+)
 from opentelemetry import context, trace
+from tenacity import (
+    before_sleep_log,
+    retry,
+    retry_if_exception,
+    stop_after_attempt,
+    wait_exponential,
+)
+from promptlayer import exceptions as _exceptions
 from promptlayer.types import RequestLog
 from promptlayer.types.prompt_template import (
     GetPromptTemplate,
@@ -35,112 +44,341 @@ from promptlayer.types.prompt_template import (
     PublishPromptTemplateResponse,
 )
-URL_API_PROMPTLAYER = os.environ.setdefault(
-    "URL_API_PROMPTLAYER", "https://api.promptlayer.com"
+# Configuration
+RERAISE_ORIGINAL_EXCEPTION = os.getenv("PROMPTLAYER_RE_RAISE_ORIGINAL_EXCEPTION", "False").lower() == "true"
+RAISE_FOR_STATUS = os.getenv("PROMPTLAYER_RAISE_FOR_STATUS", "False").lower() == "true"
+DEFAULT_HTTP_TIMEOUT = 5
+WORKFLOW_RUN_URL_TEMPLATE = "{base_url}/workflows/{workflow_id}/run"
+WORKFLOW_RUN_CHANNEL_NAME_TEMPLATE = "workflows:{workflow_id}:run:{channel_name_suffix}"
+SET_WORKFLOW_COMPLETE_MESSAGE = "SET_WORKFLOW_COMPLETE"
+WS_TOKEN_REQUEST_LIBRARY_URL = (
+    f"{os.getenv('PROMPTLAYER_BASE_URL', 'https://api.promptlayer.com')}/ws-token-request-library"
 )
-async def arun_workflow_request(
+logger = logging.getLogger(__name__)
+class FinalOutputCode(Enum):
+    OK = "OK"
+    EXCEEDS_SIZE_LIMIT = "EXCEEDS_SIZE_LIMIT"
+def should_retry_error(exception):
+    """Check if an exception should trigger a retry.
+    Only retries on server errors (5xx) and rate limits (429).
+    """
+    if hasattr(exception, "response"):
+        response = exception.response
+        if hasattr(response, "status_code"):
+            status_code = response.status_code
+            if status_code >= 500 or status_code == 429:
+                return True
+    if isinstance(exception, (_exceptions.PromptLayerInternalServerError, _exceptions.PromptLayerRateLimitError)):
+        return True
+    return False
+def retry_on_api_error(func):
+    return retry(
+        retry=retry_if_exception(should_retry_error),
+        stop=stop_after_attempt(4),  # 4 total attempts (1 initial + 3 retries)
+        wait=wait_exponential(multiplier=2, max=15),  # 2s, 4s, 8s
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+        reraise=True,
+    )(func)
+def _get_http_timeout():
+    try:
+        return float(os.getenv("PROMPTLAYER_HTTP_TIMEOUT", DEFAULT_HTTP_TIMEOUT))
+    except (ValueError, TypeError):
+        return DEFAULT_HTTP_TIMEOUT
+def _make_httpx_client():
+    return httpx.AsyncClient(timeout=_get_http_timeout())
+def _make_simple_httpx_client():
+    return httpx.Client(timeout=_get_http_timeout())
+def _get_workflow_workflow_id_or_name(workflow_id_or_name, workflow_name):
+    # This is backward compatibility code
+    if (workflow_id_or_name := workflow_name if workflow_id_or_name is None else workflow_id_or_name) is None:
+        raise ValueError('Either "workflow_id_or_name" or "workflow_name" must be provided')
+    return workflow_id_or_name
+async def _get_final_output(
+    base_url: str, execution_id: int, return_all_outputs: bool, *, headers: Dict[str, str]
+) -> Dict[str, Any]:
+    async with httpx.AsyncClient() as client:
+        response = await client.get(
+            f"{base_url}/workflow-version-execution-results",
+            headers=headers,
+            params={"workflow_version_execution_id": execution_id, "return_all_outputs": return_all_outputs},
+        )
+        if response.status_code != 200:
+            raise_on_bad_response(response, "PromptLayer had the following error while getting workflow results")
+        return response.json()
+# TODO(dmu) MEDIUM: Consider putting all these functions into a class, so we do not have to pass
+#                   `authorization_headers` into each function
+async def _resolve_workflow_id(base_url: str, workflow_id_or_name: Union[int, str], headers):
+    if isinstance(workflow_id_or_name, int):
+        return workflow_id_or_name
+    # TODO(dmu) LOW: Should we warn user here to avoid using workflow names in favor of workflow id?
+    async with _make_httpx_client() as client:
+        # TODO(dmu) MEDIUM: Generalize the way we make async calls to PromptLayer API and reuse it everywhere
+        response = await client.get(f"{base_url}/workflows/{workflow_id_or_name}", headers=headers)
+        if response.status_code != 200:
+            raise_on_bad_response(response, "PromptLayer had the following error while resolving workflow")
+        return response.json()["workflow"]["id"]
+async def _get_ably_token(base_url: str, channel_name, authentication_headers):
+    try:
+        async with _make_httpx_client() as client:
+            response = await client.post(
+                f"{base_url}/ws-token-request-library",
+                headers=authentication_headers,
+                params={"capability": channel_name},
+            )
+            if response.status_code != 201:
+                raise_on_bad_response(
+                    response,
+                    "PromptLayer had the following error while getting WebSocket token",
+                )
+            return response.json()
+    except Exception as ex:
+        error_message = f"Failed to get WebSocket token: {ex}"
+        logger.exception(error_message)
+        if RERAISE_ORIGINAL_EXCEPTION:
+            raise
+        else:
+            raise _exceptions.PromptLayerAPIError(error_message, response=None, body=None) from ex
+def _make_message_listener(base_url: str, results_future, execution_id_future, return_all_outputs, headers):
+    # We need this function to be mocked by unittests
+    async def message_listener(message: Message):
+        if results_future.cancelled() or message.name != SET_WORKFLOW_COMPLETE_MESSAGE:
+            return  # TODO(dmu) LOW: Do we really need this check?
+        execution_id = await asyncio.wait_for(execution_id_future, _get_http_timeout() * 1.1)
+        message_data = json.loads(message.data)
+        if message_data["workflow_version_execution_id"] != execution_id:
+            return
+        if (result_code := message_data.get("result_code")) in (FinalOutputCode.OK.value, None):
+            results = message_data["final_output"]
+        elif result_code == FinalOutputCode.EXCEEDS_SIZE_LIMIT.value:
+            results = await _get_final_output(base_url, execution_id, return_all_outputs, headers=headers)
+        else:
+            raise NotImplementedError(f"Unsupported final output code: {result_code}")
+        results_future.set_result(results)
+    return message_listener
+async def _subscribe_to_workflow_completion_channel(
+    base_url: str, channel, execution_id_future, return_all_outputs, headers
+):
+    results_future = asyncio.Future()
+    message_listener = _make_message_listener(
+        base_url, results_future, execution_id_future, return_all_outputs, headers
+    )
+    await channel.subscribe(SET_WORKFLOW_COMPLETE_MESSAGE, message_listener)
+    return results_future, message_listener
+async def _post_workflow_id_run(
     *,
-    workflow_name: str,
+    base_url: str,
+    authentication_headers,
+    workflow_id,
     input_variables: Dict[str, Any],
-    metadata: Optional[Dict[str, Any]] = None,
-    workflow_label_name: Optional[str] = None,
-    workflow_version_number: Optional[int] = None,
-    api_key: str,
-    return_all_outputs: Optional[bool] = False,
-    timeout: Optional[int] = 120,
-) -> Dict[str, Any]:
+    metadata: Dict[str, Any],
+    workflow_label_name: str,
+    workflow_version_number: int,
+    return_all_outputs: bool,
+    channel_name_suffix: str,
+    _url_template: str = WORKFLOW_RUN_URL_TEMPLATE,
+):
+    url = _url_template.format(base_url=base_url, workflow_id=workflow_id)
     payload = {
         "input_variables": input_variables,
         "metadata": metadata,
         "workflow_label_name": workflow_label_name,
         "workflow_version_number": workflow_version_number,
         "return_all_outputs": return_all_outputs,
+        "channel_name_suffix": channel_name_suffix,
     }
-    url = f"{URL_API_PROMPTLAYER}/workflows/{workflow_name}/run"
-    headers = {"X-API-KEY": api_key}
     try:
-        async with httpx.AsyncClient() as client:
-            response = await client.post(url, json=payload, headers=headers)
+        async with _make_httpx_client() as client:
+            response = await client.post(url, json=payload, headers=authentication_headers)
             if response.status_code != 201:
-                raise_on_bad_response(
-                    response,
-                    "PromptLayer had the following error while running your workflow",
-                )
+                raise_on_bad_response(response, "PromptLayer had the following error while running your workflow")
             result = response.json()
-            warning = result.get("warning")
-            if warning:
-                print(f"WARNING: {warning}")
-    except Exception as e:
-        error_message = f"Failed to run workflow: {str(e)}"
-        print(error_message)
-        raise Exception(error_message)
+            if warning := result.get("warning"):
+                logger.warning(f"{warning}")
+    except Exception as ex:
+        error_message = f"Failed to run workflow: {str(ex)}"
+        logger.exception(error_message)
+        if RERAISE_ORIGINAL_EXCEPTION:
+            raise
+        else:
+            raise _exceptions.PromptLayerAPIError(error_message, response=None, body=None) from ex
-    execution_id = result.get("workflow_version_execution_id")
-    if not execution_id:
-        raise Exception("No execution ID returned from workflow run")
+    return result.get("workflow_version_execution_id")
-    channel_name = f"workflow_updates:{execution_id}"
-    # Get WebSocket token
+async def _wait_for_workflow_completion(channel, results_future, message_listener, timeout):
+    # We need this function for mocking in unittests
     try:
-        async with httpx.AsyncClient() as client:
-            ws_response = await client.post(
-                f"{URL_API_PROMPTLAYER}/ws-token-request-library",
-                headers=headers,
-                params={"capability": channel_name},
-            )
-            if ws_response.status_code != 201:
-                raise_on_bad_response(
-                    ws_response,
-                    "PromptLayer had the following error while getting WebSocket token",
-                )
-            token_details = ws_response.json()["token_details"]
-    except Exception as e:
-        error_message = f"Failed to get WebSocket token: {e}"
-        print(error_message)
-        raise Exception(error_message)
+        return await asyncio.wait_for(results_future, timeout)
+    except asyncio.TimeoutError:
+        raise _exceptions.PromptLayerAPITimeoutError(
+            "Workflow execution did not complete properly", response=None, body=None
+        )
+    finally:
+        channel.unsubscribe(SET_WORKFLOW_COMPLETE_MESSAGE, message_listener)
-    # Initialize Ably client
-    ably_client = AblyRealtime(token=token_details["token"])
-    # Subscribe to the channel named after the execution ID
-    channel = ably_client.channels.get(channel_name)
+def _make_channel_name_suffix():
+    # We need this function for mocking in unittests
+    return uuid4().hex
-    final_output = {}
-    message_received_event = asyncio.Event()
-    async def message_listener(message: Message):
-        if message.name == "set_workflow_node_output":
-            data = json.loads(message.data)
-            if data.get("status") == "workflow_complete":
-                final_output.update(data.get("final_output", {}))
-                message_received_event.set()
+MessageCallback = Callable[[Message], Coroutine[None, None, None]]
-    # Subscribe to the channel
-    await channel.subscribe("set_workflow_node_output", message_listener)
+class SubscriptionEventLoggerHandler(SubscriptionEventHandler):
+    def __init__(self, callback: MessageCallback):
+        self.callback = callback
-    # Wait for the message or timeout
+    async def on_publication(self, ctx: PublicationContext):
+        message_name = ctx.pub.data.get("message_name", "unknown")
+        data = ctx.pub.data.get("data", "")
+        message = Message(name=message_name, data=data)
+        await self.callback(message)
+@asynccontextmanager
+async def centrifugo_client(address: str, token: str):
+    client = Client(address, token=token)
     try:
-        await asyncio.wait_for(message_received_event.wait(), timeout)
-    except asyncio.TimeoutError:
-        channel.unsubscribe("set_workflow_node_output", message_listener)
-        await ably_client.close()
-        raise Exception("Workflow execution did not complete properly")
+        await client.connect()
+        yield client
+    finally:
+        await client.disconnect()
-    # Unsubscribe from the channel and close the client
-    channel.unsubscribe("set_workflow_node_output", message_listener)
-    await ably_client.close()
-    return final_output
+@asynccontextmanager
+async def centrifugo_subscription(client: Client, topic: str, message_listener: MessageCallback):
+    subscription = client.new_subscription(
+        topic,
+        events=SubscriptionEventLoggerHandler(message_listener),
+    )
+    try:
+        await subscription.subscribe()
+        yield
+    finally:
+        if subscription.state == SubscriptionState.SUBSCRIBED:
+            await subscription.unsubscribe()
+@retry_on_api_error
+async def arun_workflow_request(
+    *,
+    api_key: str,
+    base_url: str,
+    throw_on_error: bool,
+    workflow_id_or_name: Optional[Union[int, str]] = None,
+    input_variables: Dict[str, Any],
+    metadata: Optional[Dict[str, Any]] = None,
+    workflow_label_name: Optional[str] = None,
+    workflow_version_number: Optional[int] = None,
+    return_all_outputs: Optional[bool] = False,
+    timeout: Optional[int] = 3600,
+    # `workflow_name` deprecated, kept for backward compatibility only.
+    workflow_name: Optional[str] = None,
+):
+    headers = {"X-API-KEY": api_key}
+    workflow_id = await _resolve_workflow_id(
+        base_url, _get_workflow_workflow_id_or_name(workflow_id_or_name, workflow_name), headers
+    )
+    channel_name_suffix = _make_channel_name_suffix()
+    channel_name = WORKFLOW_RUN_CHANNEL_NAME_TEMPLATE.format(
+        workflow_id=workflow_id, channel_name_suffix=channel_name_suffix
+    )
+    ably_token = await _get_ably_token(base_url, channel_name, headers)
+    token = ably_token["token_details"]["token"]
+    execution_id_future = asyncio.Future[int]()
+    if ably_token.get("messaging_backend") == "centrifugo":
+        address = urllib3.util.parse_url(base_url)._replace(scheme="wss", path="/connection/websocket").url
+        async with centrifugo_client(address, token) as client:
+            results_future = asyncio.Future[dict[str, Any]]()
+            async with centrifugo_subscription(
+                client,
+                channel_name,
+                _make_message_listener(base_url, results_future, execution_id_future, return_all_outputs, headers),
+            ):
+                execution_id = await _post_workflow_id_run(
+                    base_url=base_url,
+                    authentication_headers=headers,
+                    workflow_id=workflow_id,
+                    input_variables=input_variables,
+                    metadata=metadata,
+                    workflow_label_name=workflow_label_name,
+                    workflow_version_number=workflow_version_number,
+                    return_all_outputs=return_all_outputs,
+                    channel_name_suffix=channel_name_suffix,
+                )
+                execution_id_future.set_result(execution_id)
+                await asyncio.wait_for(results_future, timeout)
+                return results_future.result()
+    async with AblyRealtime(token=token) as ably_client:
+        # It is crucial to subscribe before running a workflow, otherwise we may miss a completion message
+        channel = ably_client.channels.get(channel_name)
+        results_future, message_listener = await _subscribe_to_workflow_completion_channel(
+            base_url, channel, execution_id_future, return_all_outputs, headers
+        )
+        execution_id = await _post_workflow_id_run(
+            base_url=base_url,
+            authentication_headers=headers,
+            workflow_id=workflow_id,
+            input_variables=input_variables,
+            metadata=metadata,
+            workflow_label_name=workflow_label_name,
+            workflow_version_number=workflow_version_number,
+            return_all_outputs=return_all_outputs,
+            channel_name_suffix=channel_name_suffix,
+        )
+        execution_id_future.set_result(execution_id)
+        return await _wait_for_workflow_completion(channel, results_future, message_listener, timeout)
 def promptlayer_api_handler(
+    api_key: str,
+    base_url: str,
     function_name,
     provider_type,
     args,
@@ -149,20 +387,13 @@ def promptlayer_api_handler(
     response,
     request_start_time,
     request_end_time,
-    api_key,
     return_pl_id=False,
     llm_request_span_id=None,
 ):
     if (
         isinstance(response, types.GeneratorType)
         or isinstance(response, types.AsyncGeneratorType)
-        or type(response).__name__
-        in [
-            "Stream",
-            "AsyncStream",
-            "AsyncMessageStreamManager",
-            "MessageStreamManager",
-        ]
+        or type(response).__name__ in ["Stream", "AsyncStream", "AsyncMessageStreamManager", "MessageStreamManager"]
     ):
         return GeneratorProxy(
             generator=response,
@@ -178,9 +409,11 @@ def promptlayer_api_handler(
                 "llm_request_span_id": llm_request_span_id,
             },
             api_key=api_key,
+            base_url=base_url,
         )
     else:
         request_id = promptlayer_api_request(
+            base_url=base_url,
             function_name=function_name,
             provider_type=provider_type,
             args=args,
@@ -199,6 +432,8 @@ def promptlayer_api_handler(
 async def promptlayer_api_handler_async(
+    api_key: str,
+    base_url: str,
     function_name,
     provider_type,
     args,
@@ -207,13 +442,14 @@ async def promptlayer_api_handler_async(
     response,
     request_start_time,
     request_end_time,
-    api_key,
     return_pl_id=False,
     llm_request_span_id=None,
 ):
     return await run_in_thread_async(
         None,
         promptlayer_api_handler,
+        api_key,
+        base_url,
         function_name,
         provider_type,
         args,
@@ -222,7 +458,6 @@ async def promptlayer_api_handler_async(
         response,
         request_start_time,
         request_end_time,
-        api_key,
         return_pl_id=return_pl_id,
         llm_request_span_id=llm_request_span_id,
     )
@@ -236,15 +471,13 @@ def convert_native_object_to_dict(native_object):
     if isinstance(native_object, Enum):
         return native_object.value
     if hasattr(native_object, "__dict__"):
-        return {
-            k: convert_native_object_to_dict(v)
-            for k, v in native_object.__dict__.items()
-        }
+        return {k: convert_native_object_to_dict(v) for k, v in native_object.__dict__.items()}
     return native_object
 def promptlayer_api_request(
     *,
+    base_url: str,
     function_name,
     provider_type,
     args,
@@ -261,13 +494,11 @@ def promptlayer_api_request(
     if isinstance(response, dict) and hasattr(response, "to_dict_recursive"):
         response = response.to_dict_recursive()
     request_response = None
-    if hasattr(
-        response, "dict"
-    ):  # added this for anthropic 3.0 changes, they return a completion object
+    if hasattr(response, "dict"):  # added this for anthropic 3.0 changes, they return a completion object
         response = response.dict()
     try:
         request_response = requests.post(
-            f"{URL_API_PROMPTLAYER}/track-request",
+            f"{base_url}/track-request",
             json={
                 "function_name": function_name,
                 "provider_type": provider_type,
@@ -284,61 +515,64 @@ def promptlayer_api_request(
         )
         if not hasattr(request_response, "status_code"):
             warn_on_bad_response(
-                request_response,
-                "WARNING: While logging your request PromptLayer had the following issue",
+                request_response, "WARNING: While logging your request PromptLayer had the following issue"
             )
         elif request_response.status_code != 200:
             warn_on_bad_response(
-                request_response,
-                "WARNING: While logging your request PromptLayer had the following error",
+                request_response, "WARNING: While logging your request PromptLayer had the following error"
             )
     except Exception as e:
-        print(
-            f"WARNING: While logging your request PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        logger.warning(f"While logging your request PromptLayer had the following error: {e}")
     if request_response is not None and return_pl_id:
         return request_response.json().get("request_id")
-def track_request(**body):
+@retry_on_api_error
+def track_request(base_url: str, throw_on_error: bool, **body):
     try:
         response = requests.post(
-            f"{URL_API_PROMPTLAYER}/track-request",
+            f"{base_url}/track-request",
             json=body,
         )
         if response.status_code != 200:
-            warn_on_bad_response(
-                response,
-                f"PromptLayer had the following error while tracking your request: {response.text}",
-            )
+            if throw_on_error:
+                raise_on_bad_response(response, "PromptLayer had the following error while tracking your request")
+            else:
+                warn_on_bad_response(
+                    response, f"PromptLayer had the following error while tracking your request: {response.text}"
+                )
         return response.json()
     except requests.exceptions.RequestException as e:
-        print(
-            f"WARNING: While logging your request PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while tracking your request: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While logging your request PromptLayer had the following error: {e}")
         return {}
-async def atrack_request(**body: Any) -> Dict[str, Any]:
+@retry_on_api_error
+async def atrack_request(base_url: str, throw_on_error: bool, **body: Any) -> Dict[str, Any]:
     try:
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(
-                f"{URL_API_PROMPTLAYER}/track-request",
+                f"{base_url}/track-request",
                 json=body,
             )
-        if response.status_code != 200:
-            warn_on_bad_response(
-                response,
-                f"PromptLayer had the following error while tracking your request: {response.text}",
-            )
+            if response.status_code != 200:
+                if throw_on_error:
+                    raise_on_bad_response(response, "PromptLayer had the following error while tracking your request")
+                else:
+                    warn_on_bad_response(
+                        response, f"PromptLayer had the following error while tracking your request: {response.text}"
+                    )
         return response.json()
     except httpx.RequestError as e:
-        print(
-            f"WARNING: While logging your request PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while tracking your request: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While logging your request PromptLayer had the following error: {e}")
         return {}
@@ -370,8 +604,9 @@ def promptlayer_api_request_async(
     )
+@retry_on_api_error
 def promptlayer_get_prompt(
-    prompt_name, api_key, version: int = None, label: str = None
+    api_key: str, base_url: str, throw_on_error: bool, prompt_name, version: int = None, label: str = None
 ):
     """
     Get a prompt from the PromptLayer library
@@ -380,29 +615,40 @@ def promptlayer_get_prompt(
     """
     try:
         request_response = requests.get(
-            f"{URL_API_PROMPTLAYER}/library-get-prompt-template",
+            f"{base_url}/library-get-prompt-template",
             headers={"X-API-KEY": api_key},
             params={"prompt_name": prompt_name, "version": version, "label": label},
         )
     except Exception as e:
-        raise Exception(
-            f"PromptLayer had the following error while getting your prompt: {e}"
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIError(
+                f"PromptLayer had the following error while getting your prompt: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"PromptLayer had the following error while getting your prompt: {e}")
+        return None
     if request_response.status_code != 200:
-        raise_on_bad_response(
-            request_response,
-            "PromptLayer had the following error while getting your prompt",
-        )
+        if throw_on_error:
+            raise_on_bad_response(
+                request_response,
+                "PromptLayer had the following error while getting your prompt",
+            )
+        else:
+            warn_on_bad_response(
+                request_response,
+                "WARNING: PromptLayer had the following error while getting your prompt",
+            )
+            return None
     return request_response.json()
+@retry_on_api_error
 def promptlayer_publish_prompt(
-    prompt_name, prompt_template, commit_message, tags, api_key, metadata=None
+    api_key: str, base_url: str, throw_on_error: bool, prompt_name, prompt_template, commit_message, tags, metadata=None
 ):
     try:
         request_response = requests.post(
-            f"{URL_API_PROMPTLAYER}/library-publish-prompt-template",
+            f"{base_url}/library-publish-prompt-template",
             json={
                 "prompt_name": prompt_name,
                 "prompt_template": prompt_template,
@@ -413,23 +659,34 @@ def promptlayer_publish_prompt(
             },
         )
     except Exception as e:
-        raise Exception(
-            f"PromptLayer had the following error while publishing your prompt: {e}"
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIError(
+                f"PromptLayer had the following error while publishing your prompt: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"PromptLayer had the following error while publishing your prompt: {e}")
+        return False
     if request_response.status_code != 200:
-        raise_on_bad_response(
-            request_response,
-            "PromptLayer had the following error while publishing your prompt",
-        )
+        if throw_on_error:
+            raise_on_bad_response(
+                request_response,
+                "PromptLayer had the following error while publishing your prompt",
+            )
+        else:
+            warn_on_bad_response(
+                request_response,
+                "WARNING: PromptLayer had the following error while publishing your prompt",
+            )
+            return False
     return True
+@retry_on_api_error
 def promptlayer_track_prompt(
-    request_id, prompt_name, input_variables, api_key, version, label
+    api_key: str, base_url: str, throw_on_error: bool, request_id, prompt_name, input_variables, version, label
 ):
     try:
         request_response = requests.post(
-            f"{URL_API_PROMPTLAYER}/library-track-prompt",
+            f"{base_url}/library-track-prompt",
             json={
                 "request_id": request_id,
                 "prompt_name": prompt_name,
@@ -440,29 +697,39 @@ def promptlayer_track_prompt(
             },
         )
         if request_response.status_code != 200:
-            warn_on_bad_response(
-                request_response,
-                "WARNING: While tracking your prompt PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    request_response,
+                    "While tracking your prompt PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    request_response,
+                    "WARNING: While tracking your prompt PromptLayer had the following error",
+                )
+                return False
     except Exception as e:
-        print(
-            f"WARNING: While tracking your prompt PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIError(
+                f"While tracking your prompt PromptLayer had the following error: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your prompt PromptLayer had the following error: {e}")
         return False
     return True
+@retry_on_api_error
 async def apromptlayer_track_prompt(
+    api_key: str,
+    base_url: str,
     request_id: str,
     prompt_name: str,
     input_variables: Dict[str, Any],
-    api_key: Optional[str] = None,
     version: Optional[int] = None,
     label: Optional[str] = None,
+    throw_on_error: bool = True,
 ) -> bool:
-    url = f"{URL_API_PROMPTLAYER}/library-track-prompt"
+    url = f"{base_url}/library-track-prompt"
     payload = {
         "request_id": request_id,
         "prompt_name": prompt_name,
@@ -472,28 +739,34 @@ async def apromptlayer_track_prompt(
         "label": label,
     }
     try:
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(url, json=payload)
         if response.status_code != 200:
-            warn_on_bad_response(
-                response,
-                "WARNING: While tracking your prompt, PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(response, "While tracking your prompt, PromptLayer had the following error")
+            else:
+                warn_on_bad_response(
+                    response,
+                    "WARNING: While tracking your prompt, PromptLayer had the following error",
+                )
+                return False
     except httpx.RequestError as e:
-        print(
-            f"WARNING: While tracking your prompt PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"While tracking your prompt PromptLayer had the following error: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your prompt PromptLayer had the following error: {e}")
         return False
     return True
-def promptlayer_track_metadata(request_id, metadata, api_key):
+@retry_on_api_error
+def promptlayer_track_metadata(api_key: str, base_url: str, throw_on_error: bool, request_id, metadata):
     try:
         request_response = requests.post(
-            f"{URL_API_PROMPTLAYER}/library-track-metadata",
+            f"{base_url}/library-track-metadata",
             json={
                 "request_id": request_id,
                 "metadata": metadata,
@@ -501,79 +774,106 @@ def promptlayer_track_metadata(request_id, metadata, api_key):
             },
         )
         if request_response.status_code != 200:
-            warn_on_bad_response(
-                request_response,
-                "WARNING: While tracking your metadata PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    request_response,
+                    "While tracking your metadata PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    request_response,
+                    "WARNING: While tracking your metadata PromptLayer had the following error",
+                )
+                return False
     except Exception as e:
-        print(
-            f"WARNING: While tracking your metadata PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIError(
+                f"While tracking your metadata PromptLayer had the following error: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your metadata PromptLayer had the following error: {e}")
         return False
     return True
+@retry_on_api_error
 async def apromptlayer_track_metadata(
-    request_id: str, metadata: Dict[str, Any], api_key: Optional[str] = None
+    api_key: str, base_url: str, throw_on_error: bool, request_id: str, metadata: Dict[str, Any]
 ) -> bool:
-    url = f"{URL_API_PROMPTLAYER}/library-track-metadata"
+    url = f"{base_url}/library-track-metadata"
     payload = {
         "request_id": request_id,
         "metadata": metadata,
         "api_key": api_key,
     }
     try:
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(url, json=payload)
         if response.status_code != 200:
-            warn_on_bad_response(
-                response,
-                "WARNING: While tracking your metadata, PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    response,
+                    "While tracking your metadata, PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    response,
+                    "WARNING: While tracking your metadata, PromptLayer had the following error",
+                )
+                return False
     except httpx.RequestError as e:
-        print(
-            f"WARNING: While tracking your metadata PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"While tracking your metadata PromptLayer had the following error: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your metadata PromptLayer had the following error: {e}")
         return False
     return True
-def promptlayer_track_score(request_id, score, score_name, api_key):
+@retry_on_api_error
+def promptlayer_track_score(api_key: str, base_url: str, throw_on_error: bool, request_id, score, score_name):
     try:
         data = {"request_id": request_id, "score": score, "api_key": api_key}
         if score_name is not None:
             data["name"] = score_name
         request_response = requests.post(
-            f"{URL_API_PROMPTLAYER}/library-track-score",
+            f"{base_url}/library-track-score",
             json=data,
         )
         if request_response.status_code != 200:
-            warn_on_bad_response(
-                request_response,
-                "WARNING: While tracking your score PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    request_response,
+                    "While tracking your score PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    request_response,
+                    "WARNING: While tracking your score PromptLayer had the following error",
+                )
+                return False
     except Exception as e:
-        print(
-            f"WARNING: While tracking your score PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIError(
+                f"While tracking your score PromptLayer had the following error: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your score PromptLayer had the following error: {e}")
         return False
     return True
+@retry_on_api_error
 async def apromptlayer_track_score(
+    api_key: str,
+    base_url: str,
+    throw_on_error: bool,
     request_id: str,
     score: float,
     score_name: Optional[str],
-    api_key: Optional[str] = None,
 ) -> bool:
-    url = f"{URL_API_PROMPTLAYER}/library-track-score"
+    url = f"{base_url}/library-track-score"
     data = {
         "request_id": request_id,
         "score": score,
@@ -582,30 +882,96 @@ async def apromptlayer_track_score(
     if score_name is not None:
         data["name"] = score_name
     try:
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(url, json=data)
         if response.status_code != 200:
-            warn_on_bad_response(
-                response,
-                "WARNING: While tracking your score, PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    response,
+                    "While tracking your score, PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    response,
+                    "WARNING: While tracking your score, PromptLayer had the following error",
+                )
+                return False
     except httpx.RequestError as e:
-        print(
-            f"WARNING: While tracking your score PromptLayer had the following error: {str(e)}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while tracking your score: {str(e)}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your score PromptLayer had the following error: {str(e)}")
         return False
     return True
+def build_anthropic_content_blocks(events):
+    content_blocks = []
+    current_block = None
+    current_signature = ""
+    current_thinking = ""
+    current_text = ""
+    current_tool_input_json = ""
+    usage = None
+    stop_reason = None
+    for event in events:
+        if event.type == "content_block_start":
+            current_block = deepcopy(event.content_block)
+            if current_block.type == "thinking":
+                current_signature = ""
+                current_thinking = ""
+            elif current_block.type == "text":
+                current_text = ""
+            elif current_block.type == "tool_use":
+                current_tool_input_json = ""
+        elif event.type == "content_block_delta" and current_block is not None:
+            if current_block.type == "thinking":
+                if hasattr(event.delta, "signature"):
+                    current_signature = event.delta.signature
+                if hasattr(event.delta, "thinking"):
+                    current_thinking += event.delta.thinking
+            elif current_block.type == "text":
+                if hasattr(event.delta, "text"):
+                    current_text += event.delta.text
+            elif current_block.type == "tool_use":
+                if hasattr(event.delta, "partial_json"):
+                    current_tool_input_json += event.delta.partial_json
+        elif event.type == "content_block_stop" and current_block is not None:
+            if current_block.type == "thinking":
+                current_block.signature = current_signature
+                current_block.thinking = current_thinking
+            elif current_block.type == "text":
+                current_block.text = current_text
+            elif current_block.type == "tool_use":
+                try:
+                    current_block.input = json.loads(current_tool_input_json)
+                except json.JSONDecodeError:
+                    current_block.input = {}
+            content_blocks.append(current_block)
+            current_block = None
+            current_signature = ""
+            current_thinking = ""
+            current_text = ""
+            current_tool_input_json = ""
+        elif event.type == "message_delta":
+            if hasattr(event, "usage"):
+                usage = event.usage
+            if hasattr(event.delta, "stop_reason"):
+                stop_reason = event.delta.stop_reason
+    return content_blocks, usage, stop_reason
 class GeneratorProxy:
-    def __init__(self, generator, api_request_arguments, api_key):
+    def __init__(self, generator, api_request_arguments, api_key, base_url):
         self.generator = generator
         self.results = []
         self.api_request_arugments = api_request_arguments
         self.api_key = api_key
+        self.base_url = base_url
     def __iter__(self):
         return self
@@ -620,6 +986,7 @@ class GeneratorProxy:
                 await self.generator._AsyncMessageStreamManager__api_request,
                 api_request_arguments,
                 self.api_key,
+                self.base_url,
             )
     def __enter__(self):
@@ -630,6 +997,7 @@ class GeneratorProxy:
                 stream,
                 api_request_arguments,
                 self.api_key,
+                self.base_url,
             )
     def __exit__(self, exc_type, exc_val, exc_tb):
@@ -648,9 +1016,7 @@ class GeneratorProxy:
     def __getattr__(self, name):
         if name == "text_stream":  # anthropic async stream
-            return GeneratorProxy(
-                self.generator.text_stream, self.api_request_arugments, self.api_key
-            )
+            return GeneratorProxy(self.generator.text_stream, self.api_request_arugments, self.api_key, self.base_url)
         return getattr(self.generator, name)
     def _abstracted_next(self, result):
@@ -667,12 +1033,12 @@ class GeneratorProxy:
                 end_anthropic = True
         end_openai = provider_type == "openai" and (
-            result.choices[0].finish_reason == "stop"
-            or result.choices[0].finish_reason == "length"
+            result.choices[0].finish_reason == "stop" or result.choices[0].finish_reason == "length"
         )
         if end_anthropic or end_openai:
             request_id = promptlayer_api_request(
+                base_url=self.base_url,
                 function_name=self.api_request_arugments["function_name"],
                 provider_type=self.api_request_arugments["provider_type"],
                 args=self.api_request_arugments["args"],
@@ -683,9 +1049,7 @@ class GeneratorProxy:
                 request_end_time=self.api_request_arugments["request_end_time"],
                 api_key=self.api_key,
                 return_pl_id=self.api_request_arugments["return_pl_id"],
-                llm_request_span_id=self.api_request_arugments.get(
-                    "llm_request_span_id"
-                ),
+                llm_request_span_id=self.api_request_arugments.get("llm_request_span_id"),
             )
             if self.api_request_arugments["return_pl_id"]:
@@ -702,31 +1066,35 @@ class GeneratorProxy:
             response = ""
             for result in self.results:
                 if hasattr(result, "completion"):
-                    response = f"{response}{result.completion}"
+                    response += result.completion
                 elif hasattr(result, "message") and isinstance(result.message, str):
-                    response = f"{response}{result.message}"
+                    response += result.message
                 elif (
                     hasattr(result, "content_block")
                     and hasattr(result.content_block, "text")
-                    and "type" in result
-                    and result.type != "message_stop"
+                    and getattr(result, "type", None) != "message_stop"
                 ):
-                    response = f"{response}{result.content_block.text}"
-                elif hasattr(result, "delta") and hasattr(result.delta, "text"):
-                    response = f"{response}{result.delta.text}"
-            if (
-                hasattr(self.results[-1], "type")
-                and self.results[-1].type == "message_stop"
-            ):  # this is a message stream and not the correct event
+                    response += result.content_block.text
+                elif hasattr(result, "delta"):
+                    if hasattr(result.delta, "thinking"):
+                        response += result.delta.thinking
+                    elif hasattr(result.delta, "text"):
+                        response += result.delta.text
+            # 2) If this is a “stream” (ended by message_stop), reconstruct both ThinkingBlock & TextBlock
+            last_event = self.results[-1]
+            if getattr(last_event, "type", None) == "message_stop":
                 final_result = deepcopy(self.results[0].message)
-                final_result.usage = None
-                content_block = deepcopy(self.results[1].content_block)
-                content_block.text = response
-                final_result.content = [content_block]
-            else:
-                final_result = deepcopy(self.results[-1])
-                final_result.completion = response
+                content_blocks, usage, stop_reason = build_anthropic_content_blocks(self.results)
+            final_result.content = content_blocks
+            if usage:
+                final_result.usage.output_tokens = usage.output_tokens
+            if stop_reason:
+                final_result.stop_reason = stop_reason
             return final_result
+        else:
+            return deepcopy(self.results[-1])
         if hasattr(self.results[0].choices[0], "text"):  # this is regular completion
             response = ""
             for result in self.results:
@@ -734,23 +1102,15 @@ class GeneratorProxy:
             final_result = deepcopy(self.results[-1])
             final_result.choices[0].text = response
             return final_result
-        elif hasattr(
-            self.results[0].choices[0], "delta"
-        ):  # this is completion with delta
+        elif hasattr(self.results[0].choices[0], "delta"):  # this is completion with delta
             response = {"role": "", "content": ""}
             for result in self.results:
-                if (
-                    hasattr(result.choices[0].delta, "role")
-                    and result.choices[0].delta.role is not None
-                ):
+                if hasattr(result.choices[0].delta, "role") and result.choices[0].delta.role is not None:
                     response["role"] = result.choices[0].delta.role
-                if (
-                    hasattr(result.choices[0].delta, "content")
-                    and result.choices[0].delta.content is not None
-                ):
-                    response["content"] = response[
-                        "content"
-                    ] = f"{response['content']}{result.choices[0].delta.content}"
+                if hasattr(result.choices[0].delta, "content") and result.choices[0].delta.content is not None:
+                    response["content"] = response["content"] = (
+                        f"{response['content']}{result.choices[0].delta.content}"
+                    )
             final_result = deepcopy(self.results[-1])
             final_result.choices[0] = response
             return final_result
@@ -769,39 +1129,71 @@ async def run_in_thread_async(executor, func, *args, **kwargs):
 def warn_on_bad_response(request_response, main_message):
     if hasattr(request_response, "json"):
         try:
-            print(
-                f"{main_message}: {request_response.json().get('message')}",
-                file=sys.stderr,
-            )
+            logger.warning(f"{main_message}: {request_response.json().get('message')}")
         except json.JSONDecodeError:
-            print(
-                f"{main_message}: {request_response}",
-                file=sys.stderr,
-            )
+            logger.warning(f"{main_message}: {request_response}")
     else:
-        print(f"{main_message}: {request_response}", file=sys.stderr)
+        logger.warning(f"{main_message}: {request_response}")
 def raise_on_bad_response(request_response, main_message):
+    """Raise an appropriate exception based on the HTTP status code."""
+    status_code = getattr(request_response, "status_code", None)
+    body = None
+    error_detail = None
     if hasattr(request_response, "json"):
         try:
-            raise Exception(
-                f"{main_message}: {request_response.json().get('message') or request_response.json().get('error')}"
-            )
-        except json.JSONDecodeError:
-            raise Exception(f"{main_message}: {request_response}")
+            body = request_response.json()
+            error_detail = body.get("message") or body.get("error") or body.get("detail")
+        except (json.JSONDecodeError, AttributeError):
+            body = getattr(request_response, "text", str(request_response))
+            error_detail = body
+    else:
+        body = str(request_response)
+        error_detail = body
+    if error_detail:
+        err_msg = f"{main_message}: {error_detail}"
     else:
-        raise Exception(f"{main_message}: {request_response}")
+        err_msg = main_message
+    if status_code == 400:
+        raise _exceptions.PromptLayerBadRequestError(err_msg, response=request_response, body=body)
+    if status_code == 401:
+        raise _exceptions.PromptLayerAuthenticationError(err_msg, response=request_response, body=body)
+    if status_code == 403:
+        raise _exceptions.PromptLayerPermissionDeniedError(err_msg, response=request_response, body=body)
+    if status_code == 404:
+        raise _exceptions.PromptLayerNotFoundError(err_msg, response=request_response, body=body)
+    if status_code == 409:
+        raise _exceptions.PromptLayerConflictError(err_msg, response=request_response, body=body)
+    if status_code == 422:
+        raise _exceptions.PromptLayerUnprocessableEntityError(err_msg, response=request_response, body=body)
+    if status_code == 429:
+        raise _exceptions.PromptLayerRateLimitError(err_msg, response=request_response, body=body)
+    if status_code and status_code >= 500:
+        raise _exceptions.PromptLayerInternalServerError(err_msg, response=request_response, body=body)
+    raise _exceptions.PromptLayerAPIStatusError(err_msg, response=request_response, body=body)
 async def async_wrapper(
+    api_key: str,
+    base_url: str,
     coroutine_obj,
     return_pl_id,
     request_start_time,
     function_name,
     provider_type,
     tags,
-    api_key: str = None,
     llm_request_span_id: str = None,
     tracer=None,
     *args,
@@ -814,6 +1206,8 @@ async def async_wrapper(
         response = await coroutine_obj
         request_end_time = datetime.datetime.now().timestamp()
         result = await promptlayer_api_handler_async(
+            api_key,
+            base_url,
             function_name,
             provider_type,
             args,
@@ -822,7 +1216,6 @@ async def async_wrapper(
             response,
             request_start_time,
             request_end_time,
-            api_key,
             return_pl_id=return_pl_id,
             llm_request_span_id=llm_request_span_id,
         )
@@ -837,54 +1230,75 @@ async def async_wrapper(
         context.detach(token)
-def promptlayer_create_group(api_key: str = None):
+@retry_on_api_error
+def promptlayer_create_group(api_key: str, base_url: str, throw_on_error: bool):
     try:
         request_response = requests.post(
-            f"{URL_API_PROMPTLAYER}/create-group",
+            f"{base_url}/create-group",
             json={
                 "api_key": api_key,
             },
         )
         if request_response.status_code != 200:
-            warn_on_bad_response(
-                request_response,
-                "WARNING: While creating your group PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    request_response,
+                    "While creating your group PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    request_response,
+                    "WARNING: While creating your group PromptLayer had the following error",
+                )
+                return False
     except requests.exceptions.RequestException as e:
-        # I'm aiming for a more specific exception catch here
-        raise Exception(
-            f"PromptLayer had the following error while creating your group: {e}"
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while creating your group: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While creating your group PromptLayer had the following error: {e}")
+        return False
     return request_response.json()["id"]
-async def apromptlayer_create_group(api_key: Optional[str] = None) -> str:
+@retry_on_api_error
+async def apromptlayer_create_group(api_key: str, base_url: str, throw_on_error: bool):
     try:
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(
-                f"{URL_API_PROMPTLAYER}/create-group",
+                f"{base_url}/create-group",
                 json={
                     "api_key": api_key,
                 },
             )
         if response.status_code != 200:
-            warn_on_bad_response(
-                response,
-                "WARNING: While creating your group, PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    response,
+                    "While creating your group, PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    response,
+                    "WARNING: While creating your group, PromptLayer had the following error",
+                )
+                return False
         return response.json()["id"]
     except httpx.RequestError as e:
-        raise Exception(
-            f"PromptLayer had the following error while creating your group: {str(e)}"
-        ) from e
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while creating your group: {str(e)}", response=None, body=None
+            ) from e
+        logger.warning(f"While creating your group PromptLayer had the following error: {e}")
+        return False
-def promptlayer_track_group(request_id, group_id, api_key: str = None):
+@retry_on_api_error
+def promptlayer_track_group(api_key: str, base_url: str, throw_on_error: bool, request_id, group_id):
     try:
         request_response = requests.post(
-            f"{URL_API_PROMPTLAYER}/track-group",
+            f"{base_url}/track-group",
             json={
                 "api_key": api_key,
                 "request_id": request_id,
@@ -892,118 +1306,170 @@ def promptlayer_track_group(request_id, group_id, api_key: str = None):
             },
         )
         if request_response.status_code != 200:
-            warn_on_bad_response(
-                request_response,
-                "WARNING: While tracking your group PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    request_response,
+                    "While tracking your group PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    request_response,
+                    "WARNING: While tracking your group PromptLayer had the following error",
+                )
+                return False
     except requests.exceptions.RequestException as e:
-        # I'm aiming for a more specific exception catch here
-        raise Exception(
-            f"PromptLayer had the following error while tracking your group: {e}"
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while tracking your group: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your group PromptLayer had the following error: {e}")
+        return False
     return True
-async def apromptlayer_track_group(request_id, group_id, api_key: str = None):
+@retry_on_api_error
+async def apromptlayer_track_group(api_key: str, base_url: str, throw_on_error: bool, request_id, group_id):
     try:
         payload = {
             "api_key": api_key,
             "request_id": request_id,
             "group_id": group_id,
         }
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(
-                f"{URL_API_PROMPTLAYER}/track-group",
+                f"{base_url}/track-group",
                 headers={"X-API-KEY": api_key},
                 json=payload,
             )
         if response.status_code != 200:
-            warn_on_bad_response(
-                response,
-                "WARNING: While tracking your group, PromptLayer had the following error",
-            )
-            return False
+            if throw_on_error:
+                raise_on_bad_response(
+                    response,
+                    "While tracking your group, PromptLayer had the following error",
+                )
+            else:
+                warn_on_bad_response(
+                    response,
+                    "WARNING: While tracking your group, PromptLayer had the following error",
+                )
+                return False
     except httpx.RequestError as e:
-        print(
-            f"WARNING: While tracking your group PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while tracking your group: {str(e)}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your group PromptLayer had the following error: {e}")
         return False
     return True
+@retry_on_api_error
 def get_prompt_template(
-    prompt_name: str, params: Union[GetPromptTemplate, None] = None, api_key: str = None
+    api_key: str, base_url: str, throw_on_error: bool, prompt_name: str, params: Union[GetPromptTemplate, None] = None
 ) -> GetPromptTemplateResponse:
     try:
         json_body = {"api_key": api_key}
         if params:
             json_body = {**json_body, **params}
         response = requests.post(
-            f"{URL_API_PROMPTLAYER}/prompt-templates/{prompt_name}",
+            f"{base_url}/prompt-templates/{prompt_name}",
             headers={"X-API-KEY": api_key},
             json=json_body,
         )
         if response.status_code != 200:
-            raise Exception(
-                f"PromptLayer had the following error while getting your prompt template: {response.text}"
-            )
+            if throw_on_error:
+                raise_on_bad_response(
+                    response, "PromptLayer had the following error while getting your prompt template"
+                )
+            else:
+                warn_on_bad_response(
+                    response, "WARNING: PromptLayer had the following error while getting your prompt template"
+                )
+                return None
-        warning = response.json().get("warning", None)
-        if warning is not None:
-            warn_on_bad_response(
-                warning,
-                "WARNING: While getting your prompt template",
-            )
         return response.json()
+    except requests.exceptions.ConnectionError as e:
+        err_msg = f"PromptLayer had the following error while getting your prompt template: {e}"
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(err_msg, response=None, body=None) from e
+        logger.warning(err_msg)
+        return None
+    except requests.exceptions.Timeout as e:
+        err_msg = f"PromptLayer had the following error while getting your prompt template: {e}"
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPITimeoutError(err_msg, response=None, body=None) from e
+        logger.warning(err_msg)
+        return None
     except requests.exceptions.RequestException as e:
-        raise Exception(
-            f"PromptLayer had the following error while getting your prompt template: {e}"
-        )
+        err_msg = f"PromptLayer had the following error while getting your prompt template: {e}"
+        if throw_on_error:
+            raise _exceptions.PromptLayerError(err_msg, response=None, body=None) from e
+        logger.warning(err_msg)
+        return None
+@retry_on_api_error
 async def aget_prompt_template(
+    api_key: str,
+    base_url: str,
+    throw_on_error: bool,
     prompt_name: str,
     params: Union[GetPromptTemplate, None] = None,
-    api_key: str = None,
 ) -> GetPromptTemplateResponse:
     try:
         json_body = {"api_key": api_key}
         if params:
             json_body.update(params)
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(
-                f"{URL_API_PROMPTLAYER}/prompt-templates/{prompt_name}",
+                f"{base_url}/prompt-templates/{quote(prompt_name, safe='')}",
                 headers={"X-API-KEY": api_key},
                 json=json_body,
             )
             if response.status_code != 200:
-                raise_on_bad_response(
-                    response,
-                    "PromptLayer had the following error while getting your prompt template",
-                )
-        warning = response.json().get("warning", None)
-        if warning:
-            warn_on_bad_response(
-                warning,
-                "WARNING: While getting your prompt template",
-            )
+                if throw_on_error:
+                    raise_on_bad_response(
+                        response,
+                        "PromptLayer had the following error while getting your prompt template",
+                    )
+                else:
+                    warn_on_bad_response(
+                        response, "WARNING: While getting your prompt template PromptLayer had the following error"
+                    )
+                    return None
         return response.json()
+    except (httpx.ConnectError, httpx.NetworkError) as e:
+        err_msg = f"PromptLayer had the following error while getting your prompt template: {str(e)}"
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(err_msg, response=None, body=None) from e
+        logger.warning(err_msg)
+        return None
+    except httpx.TimeoutException as e:
+        err_msg = f"PromptLayer had the following error while getting your prompt template: {str(e)}"
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPITimeoutError(err_msg, response=None, body=None) from e
+        logger.warning(err_msg)
+        return None
     except httpx.RequestError as e:
-        raise Exception(
-            f"PromptLayer had the following error while getting your prompt template: {str(e)}"
-        ) from e
+        err_msg = f"PromptLayer had the following error while getting your prompt template: {str(e)}"
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(err_msg, response=None, body=None) from e
+        logger.warning(err_msg)
+        return None
+@retry_on_api_error
 def publish_prompt_template(
+    api_key: str,
+    base_url: str,
+    throw_on_error: bool,
     body: PublishPromptTemplate,
-    api_key: str = None,
 ) -> PublishPromptTemplateResponse:
     try:
         response = requests.post(
-            f"{URL_API_PROMPTLAYER}/rest/prompt-templates",
+            f"{base_url}/rest/prompt-templates",
             headers={"X-API-KEY": api_key},
             json={
                 "prompt_template": {**body},
@@ -1012,24 +1478,38 @@ def publish_prompt_template(
             },
         )
         if response.status_code == 400:
-            raise Exception(
-                f"PromptLayer had the following error while publishing your prompt template: {response.text}"
-            )
+            if throw_on_error:
+                raise_on_bad_response(
+                    response, "PromptLayer had the following error while publishing your prompt template"
+                )
+            else:
+                warn_on_bad_response(
+                    response, "WARNING: PromptLayer had the following error while publishing your prompt template"
+                )
+                return None
         return response.json()
     except requests.exceptions.RequestException as e:
-        raise Exception(
-            f"PromptLayer had the following error while publishing your prompt template: {e}"
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while publishing your prompt template: {e}",
+                response=None,
+                body=None,
+            ) from e
+        logger.warning(f"PromptLayer had the following error while publishing your prompt template: {e}")
+        return None
+@retry_on_api_error
 async def apublish_prompt_template(
+    api_key: str,
+    base_url: str,
+    throw_on_error: bool,
     body: PublishPromptTemplate,
-    api_key: str = None,
 ) -> PublishPromptTemplateResponse:
     try:
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(
-                f"{URL_API_PROMPTLAYER}/rest/prompt-templates",
+                f"{base_url}/rest/prompt-templates",
                 headers={"X-API-KEY": api_key},
                 json={
                     "prompt_template": {**body},
@@ -1037,429 +1517,103 @@ async def apublish_prompt_template(
                     "release_labels": body.get("release_labels"),
                 },
             )
-        if response.status_code == 400:
-            raise Exception(
-                f"PromptLayer had the following error while publishing your prompt template: {response.text}"
-            )
-        if response.status_code != 201:
-            raise_on_bad_response(
-                response,
-                "PromptLayer had the following error while publishing your prompt template",
-            )
+        if response.status_code == 400 or response.status_code != 201:
+            if throw_on_error:
+                raise_on_bad_response(
+                    response,
+                    "PromptLayer had the following error while publishing your prompt template",
+                )
+            else:
+                warn_on_bad_response(
+                    response, "WARNING: PromptLayer had the following error while publishing your prompt template"
+                )
+                return None
         return response.json()
     except httpx.RequestError as e:
-        raise Exception(
-            f"PromptLayer had the following error while publishing your prompt template: {str(e)}"
-        ) from e
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while publishing your prompt template: {str(e)}",
+                response=None,
+                body=None,
+            ) from e
+        logger.warning(f"PromptLayer had the following error while publishing your prompt template: {e}")
+        return None
+@retry_on_api_error
 def get_all_prompt_templates(
-    page: int = 1, per_page: int = 30, api_key: str = None
+    api_key: str, base_url: str, throw_on_error: bool, page: int = 1, per_page: int = 30, label: str = None
 ) -> List[ListPromptTemplateResponse]:
     try:
+        params = {"page": page, "per_page": per_page}
+        if label:
+            params["label"] = label
         response = requests.get(
-            f"{URL_API_PROMPTLAYER}/prompt-templates",
+            f"{base_url}/prompt-templates",
             headers={"X-API-KEY": api_key},
-            params={"page": page, "per_page": per_page},
+            params=params,
         )
         if response.status_code != 200:
-            raise Exception(
-                f"PromptLayer had the following error while getting all your prompt templates: {response.text}"
-            )
+            if throw_on_error:
+                raise_on_bad_response(
+                    response, "PromptLayer had the following error while getting all your prompt templates"
+                )
+            else:
+                warn_on_bad_response(
+                    response, "WARNING: PromptLayer had the following error while getting all your prompt templates"
+                )
+                return []
         items = response.json().get("items", [])
         return items
     except requests.exceptions.RequestException as e:
-        raise Exception(
-            f"PromptLayer had the following error while getting all your prompt templates: {e}"
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while getting all your prompt templates: {e}",
+                response=None,
+                body=None,
+            ) from e
+        logger.warning(f"PromptLayer had the following error while getting all your prompt templates: {e}")
+        return []
+@retry_on_api_error
 async def aget_all_prompt_templates(
-    page: int = 1, per_page: int = 30, api_key: str = None
+    api_key: str, base_url: str, throw_on_error: bool, page: int = 1, per_page: int = 30, label: str = None
 ) -> List[ListPromptTemplateResponse]:
     try:
-        async with httpx.AsyncClient() as client:
+        params = {"page": page, "per_page": per_page}
+        if label:
+            params["label"] = label
+        async with _make_httpx_client() as client:
             response = await client.get(
-                f"{URL_API_PROMPTLAYER}/prompt-templates",
+                f"{base_url}/prompt-templates",
                 headers={"X-API-KEY": api_key},
-                params={"page": page, "per_page": per_page},
+                params=params,
             )
         if response.status_code != 200:
-            raise_on_bad_response(
-                response,
-                "PromptLayer had the following error while getting all your prompt templates",
-            )
+            if throw_on_error:
+                raise_on_bad_response(
+                    response,
+                    "PromptLayer had the following error while getting all your prompt templates",
+                )
+            else:
+                warn_on_bad_response(
+                    response, "WARNING: PromptLayer had the following error while getting all your prompt templates"
+                )
+                return []
         items = response.json().get("items", [])
         return items
     except httpx.RequestError as e:
-        raise Exception(
-            f"PromptLayer had the following error while getting all your prompt templates: {str(e)}"
-        ) from e
-def openai_stream_chat(results: list):
-    from openai.types.chat import (
-        ChatCompletion,
-        ChatCompletionChunk,
-        ChatCompletionMessage,
-        ChatCompletionMessageToolCall,
-    )
-    from openai.types.chat.chat_completion import Choice
-    from openai.types.chat.chat_completion_message_tool_call import Function
-    chat_completion_chunks: List[ChatCompletionChunk] = results
-    response: ChatCompletion = ChatCompletion(
-        id="",
-        object="chat.completion",
-        choices=[
-            Choice(
-                finish_reason="stop",
-                index=0,
-                message=ChatCompletionMessage(role="assistant"),
-            )
-        ],
-        created=0,
-        model="",
-    )
-    last_result = chat_completion_chunks[-1]
-    response.id = last_result.id
-    response.created = last_result.created
-    response.model = last_result.model
-    response.system_fingerprint = last_result.system_fingerprint
-    response.usage = last_result.usage
-    content = ""
-    tool_calls: Union[List[ChatCompletionMessageToolCall], None] = None
-    for result in chat_completion_chunks:
-        choices = result.choices
-        if len(choices) == 0:
-            continue
-        if choices[0].delta.content:
-            content = f"{content}{result.choices[0].delta.content}"
-        delta = choices[0].delta
-        if delta.tool_calls:
-            tool_calls = tool_calls or []
-            last_tool_call = None
-            if len(tool_calls) > 0:
-                last_tool_call = tool_calls[-1]
-            tool_call = delta.tool_calls[0]
-            if not tool_call.function:
-                continue
-            if not last_tool_call or tool_call.id:
-                tool_calls.append(
-                    ChatCompletionMessageToolCall(
-                        id=tool_call.id or "",
-                        function=Function(
-                            name=tool_call.function.name or "",
-                            arguments=tool_call.function.arguments or "",
-                        ),
-                        type=tool_call.type or "function",
-                    )
-                )
-                continue
-            last_tool_call.function.name = (
-                f"{last_tool_call.function.name}{tool_call.function.name or ''}"
-            )
-            last_tool_call.function.arguments = f"{last_tool_call.function.arguments}{tool_call.function.arguments or ''}"
-    response.choices[0].message.content = content
-    response.choices[0].message.tool_calls = tool_calls
-    return response
-async def aopenai_stream_chat(generator: AsyncIterable[Any]) -> Any:
-    from openai.types.chat import (
-        ChatCompletion,
-        ChatCompletionChunk,
-        ChatCompletionMessage,
-        ChatCompletionMessageToolCall,
-    )
-    from openai.types.chat.chat_completion import Choice
-    from openai.types.chat.chat_completion_message_tool_call import Function
-    chat_completion_chunks: List[ChatCompletionChunk] = []
-    response: ChatCompletion = ChatCompletion(
-        id="",
-        object="chat.completion",
-        choices=[
-            Choice(
-                finish_reason="stop",
-                index=0,
-                message=ChatCompletionMessage(role="assistant"),
-            )
-        ],
-        created=0,
-        model="",
-    )
-    content = ""
-    tool_calls: Union[List[ChatCompletionMessageToolCall], None] = None
-    async for result in generator:
-        chat_completion_chunks.append(result)
-        choices = result.choices
-        if len(choices) == 0:
-            continue
-        if choices[0].delta.content:
-            content = f"{content}{choices[0].delta.content}"
-        delta = choices[0].delta
-        if delta.tool_calls:
-            tool_calls = tool_calls or []
-            last_tool_call = None
-            if len(tool_calls) > 0:
-                last_tool_call = tool_calls[-1]
-            tool_call = delta.tool_calls[0]
-            if not tool_call.function:
-                continue
-            if not last_tool_call or tool_call.id:
-                tool_calls.append(
-                    ChatCompletionMessageToolCall(
-                        id=tool_call.id or "",
-                        function=Function(
-                            name=tool_call.function.name or "",
-                            arguments=tool_call.function.arguments or "",
-                        ),
-                        type=tool_call.type or "function",
-                    )
-                )
-                continue
-            last_tool_call.function.name = (
-                f"{last_tool_call.function.name}{tool_call.function.name or ''}"
-            )
-            last_tool_call.function.arguments = f"{last_tool_call.function.arguments}{tool_call.function.arguments or ''}"
-    # After collecting all chunks, set the response attributes
-    if chat_completion_chunks:
-        last_result = chat_completion_chunks[-1]
-        response.id = last_result.id
-        response.created = last_result.created
-        response.model = last_result.model
-        response.system_fingerprint = getattr(last_result, "system_fingerprint", None)
-        response.usage = last_result.usage
-    response.choices[0].message.content = content
-    response.choices[0].message.tool_calls = tool_calls
-    return response
-def openai_stream_completion(results: list):
-    from openai.types.completion import Completion, CompletionChoice
-    completions: List[Completion] = results
-    last_chunk = completions[-1]
-    response = Completion(
-        id=last_chunk.id,
-        created=last_chunk.created,
-        model=last_chunk.model,
-        object="text_completion",
-        choices=[CompletionChoice(finish_reason="stop", index=0, text="")],
-    )
-    text = ""
-    for completion in completions:
-        usage = completion.usage
-        system_fingerprint = completion.system_fingerprint
-        if len(completion.choices) > 0 and completion.choices[0].text:
-            text = f"{text}{completion.choices[0].text}"
-        if usage:
-            response.usage = usage
-        if system_fingerprint:
-            response.system_fingerprint = system_fingerprint
-    response.choices[0].text = text
-    return response
-async def aopenai_stream_completion(generator: AsyncIterable[Any]) -> Any:
-    from openai.types.completion import Completion, CompletionChoice
-    completions: List[Completion] = []
-    text = ""
-    response = Completion(
-        id="",
-        created=0,
-        model="",
-        object="text_completion",
-        choices=[CompletionChoice(finish_reason="stop", index=0, text="")],
-    )
-    async for completion in generator:
-        completions.append(completion)
-        usage = completion.usage
-        system_fingerprint = getattr(completion, "system_fingerprint", None)
-        if len(completion.choices) > 0 and completion.choices[0].text:
-            text = f"{text}{completion.choices[0].text}"
-        if usage:
-            response.usage = usage
-        if system_fingerprint:
-            response.system_fingerprint = system_fingerprint
-    # After collecting all completions, set the response attributes
-    if completions:
-        last_chunk = completions[-1]
-        response.id = last_chunk.id
-        response.created = last_chunk.created
-        response.model = last_chunk.model
-    response.choices[0].text = text
-    return response
-def anthropic_stream_message(results: list):
-    from anthropic.types import Message, MessageStreamEvent, TextBlock, Usage
-    message_stream_events: List[MessageStreamEvent] = results
-    response: Message = Message(
-        id="",
-        model="",
-        content=[],
-        role="assistant",
-        type="message",
-        stop_reason="stop_sequence",
-        stop_sequence=None,
-        usage=Usage(input_tokens=0, output_tokens=0),
-    )
-    content = ""
-    for result in message_stream_events:
-        if result.type == "message_start":
-            response = result.message
-        elif result.type == "content_block_delta":
-            if result.delta.type == "text_delta":
-                content = f"{content}{result.delta.text}"
-        elif result.type == "message_delta":
-            if hasattr(result, "usage"):
-                response.usage.output_tokens = result.usage.output_tokens
-            if hasattr(result.delta, "stop_reason"):
-                response.stop_reason = result.delta.stop_reason
-    response.content.append(TextBlock(type="text", text=content))
-    return response
-async def aanthropic_stream_message(generator: AsyncIterable[Any]) -> Any:
-    from anthropic.types import Message, MessageStreamEvent, TextBlock, Usage
-    message_stream_events: List[MessageStreamEvent] = []
-    response: Message = Message(
-        id="",
-        model="",
-        content=[],
-        role="assistant",
-        type="message",
-        stop_reason="stop_sequence",
-        stop_sequence=None,
-        usage=Usage(input_tokens=0, output_tokens=0),
-    )
-    content = ""
-    async for result in generator:
-        message_stream_events.append(result)
-        if result.type == "message_start":
-            response = result.message
-        elif result.type == "content_block_delta":
-            if result.delta.type == "text_delta":
-                content = f"{content}{result.delta.text}"
-        elif result.type == "message_delta":
-            if hasattr(result, "usage"):
-                response.usage.output_tokens = result.usage.output_tokens
-            if hasattr(result.delta, "stop_reason"):
-                response.stop_reason = result.delta.stop_reason
-    response.content.append(TextBlock(type="text", text=content))
-    return response
-def anthropic_stream_completion(results: list):
-    from anthropic.types import Completion
-    completions: List[Completion] = results
-    last_chunk = completions[-1]
-    response = Completion(
-        id=last_chunk.id,
-        completion="",
-        model=last_chunk.model,
-        stop_reason="stop",
-        type="completion",
-    )
-    text = ""
-    for completion in completions:
-        text = f"{text}{completion.completion}"
-    response.completion = text
-    return response
-async def aanthropic_stream_completion(generator: AsyncIterable[Any]) -> Any:
-    from anthropic.types import Completion
-    completions: List[Completion] = []
-    text = ""
-    response = Completion(
-        id="",
-        completion="",
-        model="",
-        stop_reason="stop",
-        type="completion",
-    )
-    async for completion in generator:
-        completions.append(completion)
-        text = f"{text}{completion.completion}"
-    # After collecting all completions, set the response attributes
-    if completions:
-        last_chunk = completions[-1]
-        response.id = last_chunk.id
-        response.model = last_chunk.model
-    response.completion = text
-    return response
-def stream_response(
-    generator: Generator, after_stream: Callable, map_results: Callable
-):
-    data = {
-        "request_id": None,
-        "raw_response": None,
-        "prompt_blueprint": None,
-    }
-    results = []
-    for result in generator:
-        results.append(result)
-        data["raw_response"] = result
-        yield data
-    request_response = map_results(results)
-    response = after_stream(request_response=request_response.model_dump())
-    data["request_id"] = response.get("request_id")
-    data["prompt_blueprint"] = response.get("prompt_blueprint")
-    yield data
-async def astream_response(
-    generator: AsyncIterable[Any],
-    after_stream: Callable[..., Any],
-    map_results: Callable[[Any], Any],
-) -> AsyncGenerator[Dict[str, Any], None]:
-    data = {
-        "request_id": None,
-        "raw_response": None,
-        "prompt_blueprint": None,
-    }
-    results = []
-    async for result in generator:
-        results.append(result)
-        data["raw_response"] = result
-        yield data
-    async def async_generator_from_list(lst):
-        for item in lst:
-            yield item
-    request_response = await map_results(async_generator_from_list(results))
-    after_stream_response = await after_stream(
-        request_response=request_response.model_dump()
-    )
-    data["request_id"] = after_stream_response.get("request_id")
-    data["prompt_blueprint"] = after_stream_response.get("prompt_blueprint")
-    yield data
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIConnectionError(
+                f"PromptLayer had the following error while getting all your prompt templates: {str(e)}",
+                response=None,
+                body=None,
+            ) from e
+        logger.warning(f"PromptLayer had the following error while getting all your prompt templates: {e}")
+        return []
 def openai_chat_request(client, **kwargs):
@@ -1476,14 +1630,20 @@ MAP_TYPE_TO_OPENAI_FUNCTION = {
 }
-def openai_request(prompt_blueprint: GetPromptTemplateResponse, **kwargs):
+def openai_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
     from openai import OpenAI
-    client = OpenAI(base_url=kwargs.pop("base_url", None))
-    request_to_make = MAP_TYPE_TO_OPENAI_FUNCTION[
-        prompt_blueprint["prompt_template"]["type"]
-    ]
-    return request_to_make(client, **kwargs)
+    client = OpenAI(**client_kwargs)
+    api_type = prompt_blueprint["metadata"]["model"].get("api_type", "chat-completions")
+    if api_type is None:
+        api_type = "chat-completions"
+    if api_type == "chat-completions":
+        request_to_make = MAP_TYPE_TO_OPENAI_FUNCTION[prompt_blueprint["prompt_template"]["type"]]
+        return request_to_make(client, **function_kwargs)
+    else:
+        return client.responses.create(**function_kwargs)
 async def aopenai_chat_request(client, **kwargs):
@@ -1500,34 +1660,45 @@ AMAP_TYPE_TO_OPENAI_FUNCTION = {
 }
-async def aopenai_request(prompt_blueprint: GetPromptTemplateResponse, **kwargs):
+async def aopenai_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
     from openai import AsyncOpenAI
-    client = AsyncOpenAI(base_url=kwargs.pop("base_url", None))
-    request_to_make = AMAP_TYPE_TO_OPENAI_FUNCTION[
-        prompt_blueprint["prompt_template"]["type"]
-    ]
-    return await request_to_make(client, **kwargs)
+    client = AsyncOpenAI(**client_kwargs)
+    api_type = prompt_blueprint["metadata"]["model"].get("api_type", "chat-completions")
+    if api_type == "chat-completions":
+        request_to_make = AMAP_TYPE_TO_OPENAI_FUNCTION[prompt_blueprint["prompt_template"]["type"]]
+        return await request_to_make(client, **function_kwargs)
+    else:
+        return await client.responses.create(**function_kwargs)
-def azure_openai_request(prompt_blueprint: GetPromptTemplateResponse, **kwargs):
+def azure_openai_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
     from openai import AzureOpenAI
-    client = AzureOpenAI(azure_endpoint=kwargs.pop("base_url", None))
-    request_to_make = MAP_TYPE_TO_OPENAI_FUNCTION[
-        prompt_blueprint["prompt_template"]["type"]
-    ]
-    return request_to_make(client, **kwargs)
+    client = AzureOpenAI(azure_endpoint=client_kwargs.pop("base_url", None))
+    api_type = prompt_blueprint["metadata"]["model"].get("api_type", "chat-completions")
+    if api_type == "chat-completions":
+        request_to_make = MAP_TYPE_TO_OPENAI_FUNCTION[prompt_blueprint["prompt_template"]["type"]]
+        return request_to_make(client, **function_kwargs)
+    else:
+        return client.responses.create(**function_kwargs)
-async def aazure_openai_request(prompt_blueprint: GetPromptTemplateResponse, **kwargs):
+async def aazure_openai_request(
+    prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict
+):
     from openai import AsyncAzureOpenAI
-    client = AsyncAzureOpenAI(azure_endpoint=kwargs.pop("base_url", None))
-    request_to_make = AMAP_TYPE_TO_OPENAI_FUNCTION[
-        prompt_blueprint["prompt_template"]["type"]
-    ]
-    return await request_to_make(client, **kwargs)
+    client = AsyncAzureOpenAI(azure_endpoint=client_kwargs.pop("base_url", None))
+    api_type = prompt_blueprint["metadata"]["model"].get("api_type", "chat-completions")
+    if api_type == "chat-completions":
+        request_to_make = AMAP_TYPE_TO_OPENAI_FUNCTION[prompt_blueprint["prompt_template"]["type"]]
+        return await request_to_make(client, **function_kwargs)
+    else:
+        return await client.responses.create(**function_kwargs)
 def anthropic_chat_request(client, **kwargs):
@@ -1544,14 +1715,12 @@ MAP_TYPE_TO_ANTHROPIC_FUNCTION = {
 }
-def anthropic_request(prompt_blueprint: GetPromptTemplateResponse, **kwargs):
+def anthropic_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
     from anthropic import Anthropic
-    client = Anthropic(base_url=kwargs.pop("base_url", None))
-    request_to_make = MAP_TYPE_TO_ANTHROPIC_FUNCTION[
-        prompt_blueprint["prompt_template"]["type"]
-    ]
-    return request_to_make(client, **kwargs)
+    client = Anthropic(**client_kwargs)
+    request_to_make = MAP_TYPE_TO_ANTHROPIC_FUNCTION[prompt_blueprint["prompt_template"]["type"]]
+    return request_to_make(client, **function_kwargs)
 async def aanthropic_chat_request(client, **kwargs):
@@ -1568,14 +1737,12 @@ AMAP_TYPE_TO_ANTHROPIC_FUNCTION = {
 }
-async def aanthropic_request(prompt_blueprint: GetPromptTemplateResponse, **kwargs):
+async def aanthropic_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
     from anthropic import AsyncAnthropic
-    client = AsyncAnthropic(base_url=kwargs.pop("base_url", None))
-    request_to_make = AMAP_TYPE_TO_ANTHROPIC_FUNCTION[
-        prompt_blueprint["prompt_template"]["type"]
-    ]
-    return await request_to_make(client, **kwargs)
+    client = AsyncAnthropic(**client_kwargs)
+    request_to_make = AMAP_TYPE_TO_ANTHROPIC_FUNCTION[prompt_blueprint["prompt_template"]["type"]]
+    return await request_to_make(client, **function_kwargs)
 # do not remove! This is used in the langchain integration.
@@ -1583,214 +1750,336 @@ def get_api_key():
     # raise an error if the api key is not set
     api_key = os.environ.get("PROMPTLAYER_API_KEY")
     if not api_key:
-        raise Exception(
-            "Please set your PROMPTLAYER_API_KEY environment variable or set API KEY in code using 'promptlayer.api_key = <your_api_key>' "
+        raise _exceptions.PromptLayerAuthenticationError(
+            "Please set your PROMPTLAYER_API_KEY environment variable or set API KEY in code using 'promptlayer.api_key = <your_api_key>'",
+            response=None,
+            body=None,
         )
     return api_key
-def util_log_request(api_key: str, **kwargs) -> Union[RequestLog, None]:
+@retry_on_api_error
+def util_log_request(api_key: str, base_url: str, throw_on_error: bool, **kwargs) -> Union[RequestLog, None]:
     try:
         response = requests.post(
-            f"{URL_API_PROMPTLAYER}/log-request",
+            f"{base_url}/log-request",
             headers={"X-API-KEY": api_key},
             json=kwargs,
         )
         if response.status_code != 201:
-            warn_on_bad_response(
-                response,
-                "WARNING: While logging your request PromptLayer had the following error",
-            )
-            return None
+            if throw_on_error:
+                raise_on_bad_response(response, "PromptLayer had the following error while logging your request")
+            else:
+                warn_on_bad_response(
+                    response,
+                    "WARNING: While logging your request PromptLayer had the following error",
+                )
+                return None
         return response.json()
     except Exception as e:
-        print(
-            f"WARNING: While tracking your prompt PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIError(
+                f"While logging your request PromptLayer had the following error: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your prompt PromptLayer had the following error: {e}")
         return None
-async def autil_log_request(api_key: str, **kwargs) -> Union[RequestLog, None]:
+@retry_on_api_error
+async def autil_log_request(api_key: str, base_url: str, throw_on_error: bool, **kwargs) -> Union[RequestLog, None]:
     try:
-        async with httpx.AsyncClient() as client:
+        async with _make_httpx_client() as client:
             response = await client.post(
-                f"{URL_API_PROMPTLAYER}/log-request",
+                f"{base_url}/log-request",
                 headers={"X-API-KEY": api_key},
                 json=kwargs,
             )
         if response.status_code != 201:
-            warn_on_bad_response(
-                response,
-                "WARNING: While logging your request PromptLayer had the following error",
-            )
-            return None
+            if throw_on_error:
+                raise_on_bad_response(response, "PromptLayer had the following error while logging your request")
+            else:
+                warn_on_bad_response(
+                    response,
+                    "WARNING: While logging your request PromptLayer had the following error",
+                )
+                return None
         return response.json()
     except Exception as e:
-        print(
-            f"WARNING: While tracking your prompt PromptLayer had the following error: {e}",
-            file=sys.stderr,
-        )
+        if throw_on_error:
+            raise _exceptions.PromptLayerAPIError(
+                f"While logging your request PromptLayer had the following error: {e}", response=None, body=None
+            ) from e
+        logger.warning(f"While tracking your prompt PromptLayer had the following error: {e}")
         return None
-def mistral_request(
-    prompt_blueprint: GetPromptTemplateResponse,
-    **kwargs,
-):
+def mistral_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
     from mistralai import Mistral
-    client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"))
-    if "stream" in kwargs and kwargs["stream"]:
-        kwargs.pop("stream")
-        return client.chat.stream(**kwargs)
-    if "stream" in kwargs:
-        kwargs.pop("stream")
-    return client.chat.complete(**kwargs)
+    client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"), client=_make_simple_httpx_client())
+    if "stream" in function_kwargs and function_kwargs["stream"]:
+        function_kwargs.pop("stream")
+        return client.chat.stream(**function_kwargs)
+    if "stream" in function_kwargs:
+        function_kwargs.pop("stream")
+    return client.chat.complete(**function_kwargs)
 async def amistral_request(
     prompt_blueprint: GetPromptTemplateResponse,
-    **kwargs,
+    _: dict,
+    function_kwargs: dict,
 ):
     from mistralai import Mistral
-    client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"))
-    if "stream" in kwargs and kwargs["stream"]:
-        return await client.chat.stream_async(**kwargs)
-    return await client.chat.complete_async(**kwargs)
+    client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"), async_client=_make_httpx_client())
+    if "stream" in function_kwargs and function_kwargs["stream"]:
+        return await client.chat.stream_async(**function_kwargs)
+    return await client.chat.complete_async(**function_kwargs)
+class _GoogleStreamWrapper:
+    """Wrapper to keep Google client alive during streaming."""
+    def __init__(self, stream_generator, client):
+        self._stream = stream_generator
+        self._client = client  # Keep client alive
+    def __iter__(self):
+        return self._stream.__iter__()
+    def __next__(self):
+        return next(self._stream)
+    def __aiter__(self):
+        return self._stream.__aiter__()
+    async def __anext__(self):
+        return await self._stream.__anext__()
+def google_chat_request(client, **kwargs):
+    from google.genai.chats import Content
+    stream = kwargs.pop("stream", False)
+    model = kwargs.get("model", "gemini-2.0-flash")
+    history = [Content(**item) for item in kwargs.get("history", [])]
+    generation_config = kwargs.get("generation_config", {})
+    chat = client.chats.create(model=model, history=history, config=generation_config)
+    last_message = history[-1].parts if history else ""
+    if stream:
+        stream_gen = chat.send_message_stream(message=last_message)
+        return _GoogleStreamWrapper(stream_gen, client)
+    return chat.send_message(message=last_message)
+def google_completions_request(client, **kwargs):
+    config = kwargs.pop("generation_config", {})
+    model = kwargs.get("model", "gemini-2.0-flash")
+    contents = kwargs.get("contents", [])
+    stream = kwargs.pop("stream", False)
+    if stream:
+        stream_gen = client.models.generate_content_stream(model=model, contents=contents, config=config)
+        return _GoogleStreamWrapper(stream_gen, client)
+    return client.models.generate_content(model=model, contents=contents, config=config)
+MAP_TYPE_TO_GOOGLE_FUNCTION = {
+    "chat": google_chat_request,
+    "completion": google_completions_request,
+}
+def google_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
+    from google import genai
-def mistral_stream_chat(results: list):
-    from openai.types.chat import (
-        ChatCompletion,
-        ChatCompletionMessage,
-        ChatCompletionMessageToolCall,
+    if os.environ.get("GOOGLE_GENAI_USE_VERTEXAI") == "true":
+        client = genai.Client(
+            vertexai=True,
+            project=os.environ.get("GOOGLE_CLOUD_PROJECT"),
+            location=os.environ.get("GOOGLE_CLOUD_LOCATION"),
+        )
+    else:
+        client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY"))
+    request_to_make = MAP_TYPE_TO_GOOGLE_FUNCTION[prompt_blueprint["prompt_template"]["type"]]
+    return request_to_make(client, **function_kwargs)
+async def agoogle_chat_request(client, **kwargs):
+    from google.genai.chats import Content
+    stream = kwargs.pop("stream", False)
+    model = kwargs.get("model", "gemini-2.0-flash")
+    history = [Content(**item) for item in kwargs.get("history", [])]
+    generation_config = kwargs.get("generation_config", {})
+    chat = client.aio.chats.create(model=model, history=history, config=generation_config)
+    last_message = history[-1].parts[0] if history else ""
+    if stream:
+        stream_gen = await chat.send_message_stream(message=last_message)
+        return _GoogleStreamWrapper(stream_gen, client)
+    return await chat.send_message(message=last_message)
+async def agoogle_completions_request(client, **kwargs):
+    config = kwargs.pop("generation_config", {})
+    model = kwargs.get("model", "gemini-2.0-flash")
+    contents = kwargs.get("contents", [])
+    stream = kwargs.pop("stream", False)
+    if stream:
+        stream_gen = await client.aio.models.generate_content_stream(model=model, contents=contents, config=config)
+        return _GoogleStreamWrapper(stream_gen, client)
+    return await client.aio.models.generate_content(model=model, contents=contents, config=config)
+AMAP_TYPE_TO_GOOGLE_FUNCTION = {
+    "chat": agoogle_chat_request,
+    "completion": agoogle_completions_request,
+}
+async def agoogle_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
+    from google import genai
+    if os.environ.get("GOOGLE_GENAI_USE_VERTEXAI") == "true":
+        client = genai.Client(
+            vertexai=True,
+            project=os.environ.get("GOOGLE_CLOUD_PROJECT"),
+            location=os.environ.get("GOOGLE_CLOUD_LOCATION"),
+        )
+    else:
+        client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY"))
+    request_to_make = AMAP_TYPE_TO_GOOGLE_FUNCTION[prompt_blueprint["prompt_template"]["type"]]
+    return await request_to_make(client, **function_kwargs)
+def vertexai_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
+    if "gemini" in prompt_blueprint["metadata"]["model"]["name"]:
+        return google_request(
+            prompt_blueprint=prompt_blueprint,
+            client_kwargs=client_kwargs,
+            function_kwargs=function_kwargs,
+        )
+    if "claude" in prompt_blueprint["metadata"]["model"]["name"]:
+        from anthropic import AnthropicVertex
+        client = AnthropicVertex(**client_kwargs)
+        if prompt_blueprint["prompt_template"]["type"] == "chat":
+            return anthropic_chat_request(client=client, **function_kwargs)
+        raise NotImplementedError(
+            f"Unsupported prompt template type {prompt_blueprint['prompt_template']['type']}' for Anthropic Vertex AI"
+        )
+    raise NotImplementedError(
+        f"Vertex AI request for model {prompt_blueprint['metadata']['model']['name']} is not implemented yet."
     )
-    from openai.types.chat.chat_completion import Choice
-    from openai.types.chat.chat_completion_message_tool_call import Function
-    last_result = results[-1]
-    response = ChatCompletion(
-        id=last_result.data.id,
-        object="chat.completion",
-        choices=[
-            Choice(
-                finish_reason=last_result.data.choices[0].finish_reason or "stop",
-                index=0,
-                message=ChatCompletionMessage(role="assistant"),
-            )
-        ],
-        created=last_result.data.created,
-        model=last_result.data.model,
+async def avertexai_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
+    if "gemini" in prompt_blueprint["metadata"]["model"]["name"]:
+        return await agoogle_request(
+            prompt_blueprint=prompt_blueprint,
+            client_kwargs=client_kwargs,
+            function_kwargs=function_kwargs,
+        )
+    if "claude" in prompt_blueprint["metadata"]["model"]["name"]:
+        from anthropic import AsyncAnthropicVertex
+        client = AsyncAnthropicVertex(**client_kwargs)
+        if prompt_blueprint["prompt_template"]["type"] == "chat":
+            return await aanthropic_chat_request(client=client, **function_kwargs)
+        raise NotImplementedError(
+            f"Unsupported prompt template type {prompt_blueprint['prompt_template']['type']}' for Anthropic Vertex AI"
+        )
+    raise NotImplementedError(
+        f"Vertex AI request for model {prompt_blueprint['metadata']['model']['name']} is not implemented yet."
     )
-    content = ""
-    tool_calls = None
-    for result in results:
-        choices = result.data.choices
-        if len(choices) == 0:
-            continue
-        delta = choices[0].delta
-        if delta.content is not None:
-            content = f"{content}{delta.content}"
-        if delta.tool_calls:
-            tool_calls = tool_calls or []
-            for tool_call in delta.tool_calls:
-                if len(tool_calls) == 0 or tool_call.id:
-                    tool_calls.append(
-                        ChatCompletionMessageToolCall(
-                            id=tool_call.id or "",
-                            function=Function(
-                                name=tool_call.function.name,
-                                arguments=tool_call.function.arguments,
-                            ),
-                            type="function",
-                        )
-                    )
-                else:
-                    last_tool_call = tool_calls[-1]
-                    if tool_call.function.name:
-                        last_tool_call.function.name = (
-                            f"{last_tool_call.function.name}{tool_call.function.name}"
-                        )
-                    if tool_call.function.arguments:
-                        last_tool_call.function.arguments = f"{last_tool_call.function.arguments}{tool_call.function.arguments}"
-    response.choices[0].message.content = content
-    response.choices[0].message.tool_calls = tool_calls
-    response.usage = last_result.data.usage
-    return response
-async def amistral_stream_chat(generator: AsyncIterable[Any]) -> Any:
-    from openai.types.chat import (
-        ChatCompletion,
-        ChatCompletionMessage,
-        ChatCompletionMessageToolCall,
+def amazon_bedrock_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
+    import boto3
+    bedrock_client = boto3.client(
+        "bedrock-runtime",
+        aws_access_key_id=function_kwargs.pop("aws_access_key", None),
+        aws_secret_access_key=function_kwargs.pop("aws_secret_key", None),
+        region_name=function_kwargs.pop("aws_region", "us-east-1"),
     )
-    from openai.types.chat.chat_completion import Choice
-    from openai.types.chat.chat_completion_message_tool_call import Function
-    completion_chunks = []
-    response = ChatCompletion(
-        id="",
-        object="chat.completion",
-        choices=[
-            Choice(
-                finish_reason="stop",
-                index=0,
-                message=ChatCompletionMessage(role="assistant"),
-            )
-        ],
-        created=0,
-        model="",
+    stream = function_kwargs.pop("stream", False)
+    if stream:
+        return bedrock_client.converse_stream(**function_kwargs)
+    else:
+        return bedrock_client.converse(**function_kwargs)
+async def aamazon_bedrock_request(
+    prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict
+):
+    import aioboto3
+    aws_access_key = function_kwargs.pop("aws_access_key", None)
+    aws_secret_key = function_kwargs.pop("aws_secret_key", None)
+    aws_region = function_kwargs.pop("aws_region", "us-east-1")
+    session_kwargs = {}
+    if aws_access_key:
+        session_kwargs["aws_access_key_id"] = aws_access_key
+    if aws_secret_key:
+        session_kwargs["aws_secret_access_key"] = aws_secret_key
+    if aws_region:
+        session_kwargs["region_name"] = aws_region
+    stream = function_kwargs.pop("stream", False)
+    session = aioboto3.Session()
+    async with session.client("bedrock-runtime", **session_kwargs) as client:
+        if stream:
+            return await client.converse_stream(**function_kwargs)
+        else:
+            return await client.converse(**function_kwargs)
+def anthropic_bedrock_request(prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict):
+    from anthropic import AnthropicBedrock
+    client = AnthropicBedrock(
+        aws_access_key=function_kwargs.pop("aws_access_key", None),
+        aws_secret_key=function_kwargs.pop("aws_secret_key", None),
+        aws_region=function_kwargs.pop("aws_region", None),
+        aws_session_token=function_kwargs.pop("aws_session_token", None),
+        base_url=function_kwargs.pop("base_url", None),
+        **client_kwargs,
+    )
+    if prompt_blueprint["prompt_template"]["type"] == "chat":
+        return anthropic_chat_request(client=client, **function_kwargs)
+    elif prompt_blueprint["prompt_template"]["type"] == "completion":
+        return anthropic_completions_request(client=client, **function_kwargs)
+    raise NotImplementedError(
+        f"Unsupported prompt template type {prompt_blueprint['prompt_template']['type']}' for Anthropic Bedrock"
+    )
+async def aanthropic_bedrock_request(
+    prompt_blueprint: GetPromptTemplateResponse, client_kwargs: dict, function_kwargs: dict
+):
+    from anthropic import AsyncAnthropicBedrock
+    client = AsyncAnthropicBedrock(
+        aws_access_key=function_kwargs.pop("aws_access_key", None),
+        aws_secret_key=function_kwargs.pop("aws_secret_key", None),
+        aws_region=function_kwargs.pop("aws_region", None),
+        aws_session_token=function_kwargs.pop("aws_session_token", None),
+        base_url=function_kwargs.pop("base_url", None),
+        **client_kwargs,
+    )
+    if prompt_blueprint["prompt_template"]["type"] == "chat":
+        return await aanthropic_chat_request(client=client, **function_kwargs)
+    elif prompt_blueprint["prompt_template"]["type"] == "completion":
+        return await aanthropic_completions_request(client=client, **function_kwargs)
+    raise NotImplementedError(
+        f"Unsupported prompt template type {prompt_blueprint['prompt_template']['type']}' for Anthropic Bedrock"
     )
-    content = ""
-    tool_calls = None
-    async for result in generator:
-        completion_chunks.append(result)
-        choices = result.data.choices
-        if len(choices) == 0:
-            continue
-        delta = choices[0].delta
-        if delta.content is not None:
-            content = f"{content}{delta.content}"
-        if delta.tool_calls:
-            tool_calls = tool_calls or []
-            for tool_call in delta.tool_calls:
-                if len(tool_calls) == 0 or tool_call.id:
-                    tool_calls.append(
-                        ChatCompletionMessageToolCall(
-                            id=tool_call.id or "",
-                            function=Function(
-                                name=tool_call.function.name,
-                                arguments=tool_call.function.arguments,
-                            ),
-                            type="function",
-                        )
-                    )
-                else:
-                    last_tool_call = tool_calls[-1]
-                    if tool_call.function.name:
-                        last_tool_call.function.name = (
-                            f"{last_tool_call.function.name}{tool_call.function.name}"
-                        )
-                    if tool_call.function.arguments:
-                        last_tool_call.function.arguments = f"{last_tool_call.function.arguments}{tool_call.function.arguments}"
-    if completion_chunks:
-        last_result = completion_chunks[-1]
-        response.id = last_result.data.id
-        response.created = last_result.data.created
-        response.model = last_result.data.model
-        response.usage = last_result.data.usage
-    response.choices[0].message.content = content
-    response.choices[0].message.tool_calls = tool_calls
-    return response

promptlayer 1.0.35__py3-none-any.whl → 1.0.78__py3-none-any.whl

promptlayer 1.0.35py3-none-any.whl → 1.0.78py3-none-any.whl