PyPI - agentrun-sdk - Versions diffs - 0.1.2__py3-none-any.whl - Mend

agentrun-sdk 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of agentrun-sdk might be problematic. Click here for more details.

Files changed (115) hide show

agentrun_operation_sdk/cli/__init__.py +1 -0
agentrun_operation_sdk/cli/cli.py +19 -0
agentrun_operation_sdk/cli/common.py +21 -0
agentrun_operation_sdk/cli/runtime/__init__.py +1 -0
agentrun_operation_sdk/cli/runtime/commands.py +203 -0
agentrun_operation_sdk/client/client.py +75 -0
agentrun_operation_sdk/operations/runtime/__init__.py +8 -0
agentrun_operation_sdk/operations/runtime/configure.py +101 -0
agentrun_operation_sdk/operations/runtime/launch.py +82 -0
agentrun_operation_sdk/operations/runtime/models.py +31 -0
agentrun_operation_sdk/services/runtime.py +152 -0
agentrun_operation_sdk/utils/logging_config.py +72 -0
agentrun_operation_sdk/utils/runtime/config.py +94 -0
agentrun_operation_sdk/utils/runtime/container.py +280 -0
agentrun_operation_sdk/utils/runtime/entrypoint.py +203 -0
agentrun_operation_sdk/utils/runtime/schema.py +56 -0
agentrun_sdk/__init__.py +7 -0
agentrun_sdk/agent/__init__.py +25 -0
agentrun_sdk/agent/agent.py +696 -0
agentrun_sdk/agent/agent_result.py +46 -0
agentrun_sdk/agent/conversation_manager/__init__.py +26 -0
agentrun_sdk/agent/conversation_manager/conversation_manager.py +88 -0
agentrun_sdk/agent/conversation_manager/null_conversation_manager.py +46 -0
agentrun_sdk/agent/conversation_manager/sliding_window_conversation_manager.py +179 -0
agentrun_sdk/agent/conversation_manager/summarizing_conversation_manager.py +252 -0
agentrun_sdk/agent/state.py +97 -0
agentrun_sdk/event_loop/__init__.py +9 -0
agentrun_sdk/event_loop/event_loop.py +499 -0
agentrun_sdk/event_loop/streaming.py +319 -0
agentrun_sdk/experimental/__init__.py +4 -0
agentrun_sdk/experimental/hooks/__init__.py +15 -0
agentrun_sdk/experimental/hooks/events.py +123 -0
agentrun_sdk/handlers/__init__.py +10 -0
agentrun_sdk/handlers/callback_handler.py +70 -0
agentrun_sdk/hooks/__init__.py +49 -0
agentrun_sdk/hooks/events.py +80 -0
agentrun_sdk/hooks/registry.py +247 -0
agentrun_sdk/models/__init__.py +10 -0
agentrun_sdk/models/anthropic.py +432 -0
agentrun_sdk/models/bedrock.py +649 -0
agentrun_sdk/models/litellm.py +225 -0
agentrun_sdk/models/llamaapi.py +438 -0
agentrun_sdk/models/mistral.py +539 -0
agentrun_sdk/models/model.py +95 -0
agentrun_sdk/models/ollama.py +357 -0
agentrun_sdk/models/openai.py +436 -0
agentrun_sdk/models/sagemaker.py +598 -0
agentrun_sdk/models/writer.py +449 -0
agentrun_sdk/multiagent/__init__.py +22 -0
agentrun_sdk/multiagent/a2a/__init__.py +15 -0
agentrun_sdk/multiagent/a2a/executor.py +148 -0
agentrun_sdk/multiagent/a2a/server.py +252 -0
agentrun_sdk/multiagent/base.py +92 -0
agentrun_sdk/multiagent/graph.py +555 -0
agentrun_sdk/multiagent/swarm.py +656 -0
agentrun_sdk/py.typed +1 -0
agentrun_sdk/session/__init__.py +18 -0
agentrun_sdk/session/file_session_manager.py +216 -0
agentrun_sdk/session/repository_session_manager.py +152 -0
agentrun_sdk/session/s3_session_manager.py +272 -0
agentrun_sdk/session/session_manager.py +73 -0
agentrun_sdk/session/session_repository.py +51 -0
agentrun_sdk/telemetry/__init__.py +21 -0
agentrun_sdk/telemetry/config.py +194 -0
agentrun_sdk/telemetry/metrics.py +476 -0
agentrun_sdk/telemetry/metrics_constants.py +15 -0
agentrun_sdk/telemetry/tracer.py +563 -0
agentrun_sdk/tools/__init__.py +17 -0
agentrun_sdk/tools/decorator.py +569 -0
agentrun_sdk/tools/executor.py +137 -0
agentrun_sdk/tools/loader.py +152 -0
agentrun_sdk/tools/mcp/__init__.py +13 -0
agentrun_sdk/tools/mcp/mcp_agent_tool.py +99 -0
agentrun_sdk/tools/mcp/mcp_client.py +423 -0
agentrun_sdk/tools/mcp/mcp_instrumentation.py +322 -0
agentrun_sdk/tools/mcp/mcp_types.py +63 -0
agentrun_sdk/tools/registry.py +607 -0
agentrun_sdk/tools/structured_output.py +421 -0
agentrun_sdk/tools/tools.py +217 -0
agentrun_sdk/tools/watcher.py +136 -0
agentrun_sdk/types/__init__.py +5 -0
agentrun_sdk/types/collections.py +23 -0
agentrun_sdk/types/content.py +188 -0
agentrun_sdk/types/event_loop.py +48 -0
agentrun_sdk/types/exceptions.py +81 -0
agentrun_sdk/types/guardrails.py +254 -0
agentrun_sdk/types/media.py +89 -0
agentrun_sdk/types/session.py +152 -0
agentrun_sdk/types/streaming.py +201 -0
agentrun_sdk/types/tools.py +258 -0
agentrun_sdk/types/traces.py +5 -0
agentrun_sdk-0.1.2.dist-info/METADATA +51 -0
agentrun_sdk-0.1.2.dist-info/RECORD +115 -0
agentrun_sdk-0.1.2.dist-info/WHEEL +5 -0
agentrun_sdk-0.1.2.dist-info/entry_points.txt +2 -0
agentrun_sdk-0.1.2.dist-info/top_level.txt +3 -0
agentrun_wrapper/__init__.py +11 -0
agentrun_wrapper/_utils/__init__.py +6 -0
agentrun_wrapper/_utils/endpoints.py +16 -0
agentrun_wrapper/identity/__init__.py +5 -0
agentrun_wrapper/identity/auth.py +211 -0
agentrun_wrapper/memory/__init__.py +6 -0
agentrun_wrapper/memory/client.py +1697 -0
agentrun_wrapper/memory/constants.py +103 -0
agentrun_wrapper/memory/controlplane.py +626 -0
agentrun_wrapper/py.typed +1 -0
agentrun_wrapper/runtime/__init__.py +13 -0
agentrun_wrapper/runtime/app.py +473 -0
agentrun_wrapper/runtime/context.py +34 -0
agentrun_wrapper/runtime/models.py +25 -0
agentrun_wrapper/services/__init__.py +1 -0
agentrun_wrapper/services/identity.py +192 -0
agentrun_wrapper/tools/__init__.py +6 -0
agentrun_wrapper/tools/browser_client.py +325 -0
agentrun_wrapper/tools/code_interpreter_client.py +186 -0

agentrun_sdk/models/litellm.py ADDED Viewed

@@ -0,0 +1,225 @@
+"""LiteLLM model provider.
+- Docs: https://docs.litellm.ai/
+"""
+import json
+import logging
+from typing import Any, AsyncGenerator, Optional, Type, TypedDict, TypeVar, Union, cast
+import litellm
+from litellm.utils import supports_response_schema
+from pydantic import BaseModel
+from typing_extensions import Unpack, override
+from ..types.content import ContentBlock, Messages
+from ..types.streaming import StreamEvent
+from ..types.tools import ToolSpec
+from .openai import OpenAIModel
+logger = logging.getLogger(__name__)
+T = TypeVar("T", bound=BaseModel)
+class LiteLLMModel(OpenAIModel):
+    """LiteLLM model provider implementation."""
+    class LiteLLMConfig(TypedDict, total=False):
+        """Configuration options for LiteLLM models.
+        Attributes:
+            model_id: Model ID (e.g., "openai/gpt-4o", "anthropic/claude-3-sonnet").
+                For a complete list of supported models, see https://docs.litellm.ai/docs/providers.
+            params: Model parameters (e.g., max_tokens).
+                For a complete list of supported parameters, see
+                https://docs.litellm.ai/docs/completion/input#input-params-1.
+        """
+        model_id: str
+        params: Optional[dict[str, Any]]
+    def __init__(self, client_args: Optional[dict[str, Any]] = None, **model_config: Unpack[LiteLLMConfig]) -> None:
+        """Initialize provider instance.
+        Args:
+            client_args: Arguments for the LiteLLM client.
+                For a complete list of supported arguments, see
+                https://github.com/BerriAI/litellm/blob/main/litellm/main.py.
+            **model_config: Configuration options for the LiteLLM model.
+        """
+        self.client_args = client_args or {}
+        self.config = dict(model_config)
+        logger.debug("config=<%s> | initializing", self.config)
+    @override
+    def update_config(self, **model_config: Unpack[LiteLLMConfig]) -> None:  # type: ignore[override]
+        """Update the LiteLLM model configuration with the provided arguments.
+        Args:
+            **model_config: Configuration overrides.
+        """
+        self.config.update(model_config)
+    @override
+    def get_config(self) -> LiteLLMConfig:
+        """Get the LiteLLM model configuration.
+        Returns:
+            The LiteLLM model configuration.
+        """
+        return cast(LiteLLMModel.LiteLLMConfig, self.config)
+    @override
+    @classmethod
+    def format_request_message_content(cls, content: ContentBlock) -> dict[str, Any]:
+        """Format a LiteLLM content block.
+        Args:
+            content: Message content.
+        Returns:
+            LiteLLM formatted content block.
+        Raises:
+            TypeError: If the content block type cannot be converted to a LiteLLM-compatible format.
+        """
+        if "reasoningContent" in content:
+            return {
+                "signature": content["reasoningContent"]["reasoningText"]["signature"],
+                "thinking": content["reasoningContent"]["reasoningText"]["text"],
+                "type": "thinking",
+            }
+        if "video" in content:
+            return {
+                "type": "video_url",
+                "video_url": {
+                    "detail": "auto",
+                    "url": content["video"]["source"]["bytes"],
+                },
+            }
+        return super().format_request_message_content(content)
+    @override
+    async def stream(
+        self,
+        messages: Messages,
+        tool_specs: Optional[list[ToolSpec]] = None,
+        system_prompt: Optional[str] = None,
+        **kwargs: Any,
+    ) -> AsyncGenerator[StreamEvent, None]:
+        """Stream conversation with the LiteLLM model.
+        Args:
+            messages: List of message objects to be processed by the model.
+            tool_specs: List of tool specifications to make available to the model.
+            system_prompt: System prompt to provide context to the model.
+            **kwargs: Additional keyword arguments for future extensibility.
+        Yields:
+            Formatted message chunks from the model.
+        """
+        logger.debug("formatting request")
+        request = self.format_request(messages, tool_specs, system_prompt)
+        logger.debug("request=<%s>", request)
+        logger.debug("invoking model")
+        response = await litellm.acompletion(**self.client_args, **request)
+        logger.debug("got response from model")
+        yield self.format_chunk({"chunk_type": "message_start"})
+        yield self.format_chunk({"chunk_type": "content_start", "data_type": "text"})
+        tool_calls: dict[int, list[Any]] = {}
+        async for event in response:
+            # Defensive: skip events with empty or missing choices
+            if not getattr(event, "choices", None):
+                continue
+            choice = event.choices[0]
+            if choice.delta.content:
+                yield self.format_chunk(
+                    {"chunk_type": "content_delta", "data_type": "text", "data": choice.delta.content}
+                )
+            if hasattr(choice.delta, "reasoning_content") and choice.delta.reasoning_content:
+                yield self.format_chunk(
+                    {
+                        "chunk_type": "content_delta",
+                        "data_type": "reasoning_content",
+                        "data": choice.delta.reasoning_content,
+                    }
+                )
+            for tool_call in choice.delta.tool_calls or []:
+                tool_calls.setdefault(tool_call.index, []).append(tool_call)
+            if choice.finish_reason:
+                break
+        yield self.format_chunk({"chunk_type": "content_stop", "data_type": "text"})
+        for tool_deltas in tool_calls.values():
+            yield self.format_chunk({"chunk_type": "content_start", "data_type": "tool", "data": tool_deltas[0]})
+            for tool_delta in tool_deltas:
+                yield self.format_chunk({"chunk_type": "content_delta", "data_type": "tool", "data": tool_delta})
+            yield self.format_chunk({"chunk_type": "content_stop", "data_type": "tool"})
+        yield self.format_chunk({"chunk_type": "message_stop", "data": choice.finish_reason})
+        # Skip remaining events as we don't have use for anything except the final usage payload
+        async for event in response:
+            _ = event
+        if event.usage:
+            yield self.format_chunk({"chunk_type": "metadata", "data": event.usage})
+        logger.debug("finished streaming response from model")
+    @override
+    async def structured_output(
+        self, output_model: Type[T], prompt: Messages, system_prompt: Optional[str] = None, **kwargs: Any
+    ) -> AsyncGenerator[dict[str, Union[T, Any]], None]:
+        """Get structured output from the model.
+        Args:
+            output_model: The output model to use for the agent.
+            prompt: The prompt messages to use for the agent.
+            system_prompt: System prompt to provide context to the model.
+            **kwargs: Additional keyword arguments for future extensibility.
+        Yields:
+            Model events with the last being the structured output.
+        """
+        response = await litellm.acompletion(
+            **self.client_args,
+            model=self.get_config()["model_id"],
+            messages=self.format_request(prompt, system_prompt=system_prompt)["messages"],
+            response_format=output_model,
+        )
+        if not supports_response_schema(self.get_config()["model_id"]):
+            raise ValueError("Model does not support response_format")
+        if len(response.choices) > 1:
+            raise ValueError("Multiple choices found in the response.")
+        # Find the first choice with tool_calls
+        for choice in response.choices:
+            if choice.finish_reason == "tool_calls":
+                try:
+                    # Parse the tool call content as JSON
+                    tool_call_data = json.loads(choice.message.content)
+                    # Instantiate the output model with the parsed data
+                    yield {"output": output_model(**tool_call_data)}
+                    return
+                except (json.JSONDecodeError, TypeError, ValueError) as e:
+                    raise ValueError(f"Failed to parse or load content into model: {e}") from e
+        # If no tool_calls found, raise an error
+        raise ValueError("No tool_calls found in response")

agentrun_sdk/models/llamaapi.py ADDED Viewed

@@ -0,0 +1,438 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+"""Llama API model provider.
+- Docs: https://llama.developer.meta.com/
+"""
+import base64
+import json
+import logging
+import mimetypes
+from typing import Any, AsyncGenerator, Optional, Type, TypeVar, Union, cast
+import llama_api_client
+from llama_api_client import LlamaAPIClient
+from pydantic import BaseModel
+from typing_extensions import TypedDict, Unpack, override
+from ..types.content import ContentBlock, Messages
+from ..types.exceptions import ModelThrottledException
+from ..types.streaming import StreamEvent, Usage
+from ..types.tools import ToolResult, ToolSpec, ToolUse
+from .model import Model
+logger = logging.getLogger(__name__)
+T = TypeVar("T", bound=BaseModel)
+class LlamaAPIModel(Model):
+    """Llama API model provider implementation."""
+    class LlamaConfig(TypedDict, total=False):
+        """Configuration options for Llama API models.
+        Attributes:
+            model_id: Model ID (e.g., "Llama-4-Maverick-17B-128E-Instruct-FP8").
+            repetition_penalty: Repetition penalty.
+            temperature: Temperature.
+            top_p: Top-p.
+            max_completion_tokens: Maximum completion tokens.
+            top_k: Top-k.
+        """
+        model_id: str
+        repetition_penalty: Optional[float]
+        temperature: Optional[float]
+        top_p: Optional[float]
+        max_completion_tokens: Optional[int]
+        top_k: Optional[int]
+    def __init__(
+        self,
+        *,
+        client_args: Optional[dict[str, Any]] = None,
+        **model_config: Unpack[LlamaConfig],
+    ) -> None:
+        """Initialize provider instance.
+        Args:
+            client_args: Arguments for the Llama API client.
+            **model_config: Configuration options for the Llama API model.
+        """
+        self.config = LlamaAPIModel.LlamaConfig(**model_config)
+        logger.debug("config=<%s> | initializing", self.config)
+        if not client_args:
+            self.client = LlamaAPIClient()
+        else:
+            self.client = LlamaAPIClient(**client_args)
+    @override
+    def update_config(self, **model_config: Unpack[LlamaConfig]) -> None:  # type: ignore
+        """Update the Llama API Model configuration with the provided arguments.
+        Args:
+            **model_config: Configuration overrides.
+        """
+        self.config.update(model_config)
+    @override
+    def get_config(self) -> LlamaConfig:
+        """Get the Llama API model configuration.
+        Returns:
+            The Llama API model configuration.
+        """
+        return self.config
+    def _format_request_message_content(self, content: ContentBlock) -> dict[str, Any]:
+        """Format a LlamaAPI content block.
+        - NOTE: "reasoningContent" and "video" are not supported currently.
+        Args:
+            content: Message content.
+        Returns:
+            LllamaAPI formatted content block.
+        Raises:
+            TypeError: If the content block type cannot be converted to a LlamaAPI-compatible format.
+        """
+        if "image" in content:
+            mime_type = mimetypes.types_map.get(f".{content['image']['format']}", "application/octet-stream")
+            image_data = base64.b64encode(content["image"]["source"]["bytes"]).decode("utf-8")
+            return {
+                "image_url": {
+                    "url": f"data:{mime_type};base64,{image_data}",
+                },
+                "type": "image_url",
+            }
+        if "text" in content:
+            return {"text": content["text"], "type": "text"}
+        raise TypeError(f"content_type=<{next(iter(content))}> | unsupported type")
+    def _format_request_message_tool_call(self, tool_use: ToolUse) -> dict[str, Any]:
+        """Format a Llama API tool call.
+        Args:
+            tool_use: Tool use requested by the model.
+        Returns:
+            Llama API formatted tool call.
+        """
+        return {
+            "function": {
+                "arguments": json.dumps(tool_use["input"]),
+                "name": tool_use["name"],
+            },
+            "id": tool_use["toolUseId"],
+        }
+    def _format_request_tool_message(self, tool_result: ToolResult) -> dict[str, Any]:
+        """Format a Llama API tool message.
+        Args:
+            tool_result: Tool result collected from a tool execution.
+        Returns:
+            Llama API formatted tool message.
+        """
+        contents = cast(
+            list[ContentBlock],
+            [
+                {"text": json.dumps(content["json"])} if "json" in content else content
+                for content in tool_result["content"]
+            ],
+        )
+        return {
+            "role": "tool",
+            "tool_call_id": tool_result["toolUseId"],
+            "content": [self._format_request_message_content(content) for content in contents],
+        }
+    def _format_request_messages(self, messages: Messages, system_prompt: Optional[str] = None) -> list[dict[str, Any]]:
+        """Format a LlamaAPI compatible messages array.
+        Args:
+            messages: List of message objects to be processed by the model.
+            system_prompt: System prompt to provide context to the model.
+        Returns:
+            An LlamaAPI compatible messages array.
+        """
+        formatted_messages: list[dict[str, Any]]
+        formatted_messages = [{"role": "system", "content": system_prompt}] if system_prompt else []
+        for message in messages:
+            contents = message["content"]
+            formatted_contents: list[dict[str, Any]] | dict[str, Any] | str = ""
+            formatted_contents = [
+                self._format_request_message_content(content)
+                for content in contents
+                if not any(block_type in content for block_type in ["toolResult", "toolUse"])
+            ]
+            formatted_tool_calls = [
+                self._format_request_message_tool_call(content["toolUse"])
+                for content in contents
+                if "toolUse" in content
+            ]
+            formatted_tool_messages = [
+                self._format_request_tool_message(content["toolResult"])
+                for content in contents
+                if "toolResult" in content
+            ]
+            if message["role"] == "assistant":
+                formatted_contents = formatted_contents[0] if formatted_contents else ""
+            formatted_message = {
+                "role": message["role"],
+                "content": formatted_contents if len(formatted_contents) > 0 else "",
+                **({"tool_calls": formatted_tool_calls} if formatted_tool_calls else {}),
+            }
+            formatted_messages.append(formatted_message)
+            formatted_messages.extend(formatted_tool_messages)
+        return [message for message in formatted_messages if message["content"] or "tool_calls" in message]
+    def format_request(
+        self, messages: Messages, tool_specs: Optional[list[ToolSpec]] = None, system_prompt: Optional[str] = None
+    ) -> dict[str, Any]:
+        """Format a Llama API chat streaming request.
+        Args:
+            messages: List of message objects to be processed by the model.
+            tool_specs: List of tool specifications to make available to the model.
+            system_prompt: System prompt to provide context to the model.
+        Returns:
+            An Llama API chat streaming request.
+        Raises:
+            TypeError: If a message contains a content block type that cannot be converted to a LlamaAPI-compatible
+                format.
+        """
+        request = {
+            "messages": self._format_request_messages(messages, system_prompt),
+            "model": self.config["model_id"],
+            "stream": True,
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": tool_spec["name"],
+                        "description": tool_spec["description"],
+                        "parameters": tool_spec["inputSchema"]["json"],
+                    },
+                }
+                for tool_spec in tool_specs or []
+            ],
+        }
+        if "temperature" in self.config:
+            request["temperature"] = self.config["temperature"]
+        if "top_p" in self.config:
+            request["top_p"] = self.config["top_p"]
+        if "repetition_penalty" in self.config:
+            request["repetition_penalty"] = self.config["repetition_penalty"]
+        if "max_completion_tokens" in self.config:
+            request["max_completion_tokens"] = self.config["max_completion_tokens"]
+        if "top_k" in self.config:
+            request["top_k"] = self.config["top_k"]
+        return request
+    def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
+        """Format the Llama API model response events into standardized message chunks.
+        Args:
+            event: A response event from the model.
+        Returns:
+            The formatted chunk.
+        """
+        match event["chunk_type"]:
+            case "message_start":
+                return {"messageStart": {"role": "assistant"}}
+            case "content_start":
+                if event["data_type"] == "text":
+                    return {"contentBlockStart": {"start": {}}}
+                return {
+                    "contentBlockStart": {
+                        "start": {
+                            "toolUse": {
+                                "name": event["data"].function.name,
+                                "toolUseId": event["data"].id,
+                            }
+                        }
+                    }
+                }
+            case "content_delta":
+                if event["data_type"] == "text":
+                    return {"contentBlockDelta": {"delta": {"text": event["data"]}}}
+                return {"contentBlockDelta": {"delta": {"toolUse": {"input": event["data"].function.arguments}}}}
+            case "content_stop":
+                return {"contentBlockStop": {}}
+            case "message_stop":
+                match event["data"]:
+                    case "tool_calls":
+                        return {"messageStop": {"stopReason": "tool_use"}}
+                    case "length":
+                        return {"messageStop": {"stopReason": "max_tokens"}}
+                    case _:
+                        return {"messageStop": {"stopReason": "end_turn"}}
+            case "metadata":
+                usage = {}
+                for metrics in event["data"]:
+                    if metrics.metric == "num_prompt_tokens":
+                        usage["inputTokens"] = metrics.value
+                    elif metrics.metric == "num_completion_tokens":
+                        usage["outputTokens"] = metrics.value
+                    elif metrics.metric == "num_total_tokens":
+                        usage["totalTokens"] = metrics.value
+                usage_type = Usage(
+                    inputTokens=usage["inputTokens"],
+                    outputTokens=usage["outputTokens"],
+                    totalTokens=usage["totalTokens"],
+                )
+                return {
+                    "metadata": {
+                        "usage": usage_type,
+                        "metrics": {
+                            "latencyMs": 0,  # TODO
+                        },
+                    },
+                }
+            case _:
+                raise RuntimeError(f"chunk_type=<{event['chunk_type']} | unknown type")
+    @override
+    async def stream(
+        self,
+        messages: Messages,
+        tool_specs: Optional[list[ToolSpec]] = None,
+        system_prompt: Optional[str] = None,
+        **kwargs: Any,
+    ) -> AsyncGenerator[StreamEvent, None]:
+        """Stream conversation with the LlamaAPI model.
+        Args:
+            messages: List of message objects to be processed by the model.
+            tool_specs: List of tool specifications to make available to the model.
+            system_prompt: System prompt to provide context to the model.
+            **kwargs: Additional keyword arguments for future extensibility.
+        Yields:
+            Formatted message chunks from the model.
+        Raises:
+            ModelThrottledException: When the model service is throttling requests from the client.
+        """
+        logger.debug("formatting request")
+        request = self.format_request(messages, tool_specs, system_prompt)
+        logger.debug("request=<%s>", request)
+        logger.debug("invoking model")
+        try:
+            response = self.client.chat.completions.create(**request)
+        except llama_api_client.RateLimitError as e:
+            raise ModelThrottledException(str(e)) from e
+        logger.debug("got response from model")
+        yield self.format_chunk({"chunk_type": "message_start"})
+        stop_reason = None
+        tool_calls: dict[Any, list[Any]] = {}
+        curr_tool_call_id = None
+        metrics_event = None
+        for chunk in response:
+            if chunk.event.event_type == "start":
+                yield self.format_chunk({"chunk_type": "content_start", "data_type": "text"})
+            elif chunk.event.event_type in ["progress", "complete"] and chunk.event.delta.type == "text":
+                yield self.format_chunk(
+                    {"chunk_type": "content_delta", "data_type": "text", "data": chunk.event.delta.text}
+                )
+            else:
+                if chunk.event.delta.type == "tool_call":
+                    if chunk.event.delta.id:
+                        curr_tool_call_id = chunk.event.delta.id
+                    if curr_tool_call_id not in tool_calls:
+                        tool_calls[curr_tool_call_id] = []
+                    tool_calls[curr_tool_call_id].append(chunk.event.delta)
+                elif chunk.event.event_type == "metrics":
+                    metrics_event = chunk.event.metrics
+                else:
+                    yield self.format_chunk(chunk)
+            if stop_reason is None:
+                stop_reason = chunk.event.stop_reason
+            # stopped generation
+            if stop_reason:
+                yield self.format_chunk({"chunk_type": "content_stop", "data_type": "text"})
+        for tool_deltas in tool_calls.values():
+            tool_start, tool_deltas = tool_deltas[0], tool_deltas[1:]
+            yield self.format_chunk({"chunk_type": "content_start", "data_type": "tool", "data": tool_start})
+            for tool_delta in tool_deltas:
+                yield self.format_chunk({"chunk_type": "content_delta", "data_type": "tool", "data": tool_delta})
+            yield self.format_chunk({"chunk_type": "content_stop", "data_type": "tool"})
+        yield self.format_chunk({"chunk_type": "message_stop", "data": stop_reason})
+        # we may have a metrics event here
+        if metrics_event:
+            yield self.format_chunk({"chunk_type": "metadata", "data": metrics_event})
+        logger.debug("finished streaming response from model")
+    @override
+    def structured_output(
+        self, output_model: Type[T], prompt: Messages, system_prompt: Optional[str] = None, **kwargs: Any
+    ) -> AsyncGenerator[dict[str, Union[T, Any]], None]:
+        """Get structured output from the model.
+        Args:
+            output_model: The output model to use for the agent.
+            prompt: The prompt messages to use for the agent.
+            system_prompt: System prompt to provide context to the model.
+            **kwargs: Additional keyword arguments for future extensibility.
+        Yields:
+            Model events with the last being the structured output.
+        Raises:
+            NotImplementedError: Structured output is not currently supported for LlamaAPI models.
+        """
+        # response_format: ResponseFormat = {
+        #     "type": "json_schema",
+        #     "json_schema": {
+        #         "name": output_model.__name__,
+        #         "schema": output_model.model_json_schema(),
+        #     },
+        # }
+        # response = self.client.chat.completions.create(
+        #     model=self.config["model_id"],
+        #     messages=self.format_request(prompt)["messages"],
+        #     response_format=response_format,
+        # )
+        raise NotImplementedError("Strands sdk-python does not implement this in the Llama API Preview.")