PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

llama_stack/distribution/utils/exec.py DELETED Viewed

@@ -1,105 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import errno
-import os
-import pty
-import select
-import signal
-import subprocess
-import sys
-import termios
-from termcolor import cprint
-# run a command in a pseudo-terminal, with interrupt handling,
-# useful when you want to run interactive things
-def run_with_pty(command):
-    master, slave = pty.openpty()
-    old_settings = termios.tcgetattr(sys.stdin)
-    original_sigint = signal.getsignal(signal.SIGINT)
-    ctrl_c_pressed = False
-    def sigint_handler(signum, frame):
-        nonlocal ctrl_c_pressed
-        ctrl_c_pressed = True
-        cprint("\nCtrl-C detected. Aborting...", "white", attrs=["bold"])
-    try:
-        # Set up the signal handler
-        signal.signal(signal.SIGINT, sigint_handler)
-        new_settings = termios.tcgetattr(sys.stdin)
-        new_settings[3] = new_settings[3] & ~termios.ECHO  # Disable echo
-        new_settings[3] = new_settings[3] & ~termios.ICANON  # Disable canonical mode
-        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
-        process = subprocess.Popen(
-            command,
-            stdin=slave,
-            stdout=slave,
-            stderr=slave,
-            universal_newlines=True,
-            preexec_fn=os.setsid,
-        )
-        # Close the slave file descriptor as it's now owned by the subprocess
-        os.close(slave)
-        def handle_io():
-            while not ctrl_c_pressed:
-                try:
-                    rlist, _, _ = select.select([sys.stdin, master], [], [], 0.1)
-                    if sys.stdin in rlist:
-                        data = os.read(sys.stdin.fileno(), 1024)
-                        if not data:
-                            break
-                        os.write(master, data)
-                    if master in rlist:
-                        data = os.read(master, 1024)
-                        if not data:
-                            break
-                        sys.stdout.buffer.write(data)
-                        sys.stdout.flush()
-                except KeyboardInterrupt:
-                    # This will be raised when Ctrl+C is pressed
-                    break
-                if process.poll() is not None:
-                    break
-        handle_io()
-    except (EOFError, KeyboardInterrupt):
-        pass
-    except OSError as e:
-        if e.errno != errno.EIO:
-            raise
-    finally:
-        # Clean up
-        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
-        signal.signal(signal.SIGINT, original_sigint)
-        os.close(master)
-        if process.poll() is None:
-            process.terminate()
-            process.wait()
-    return process.returncode
-def run_command(command):
-    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    output, error = process.communicate()
-    if process.returncode != 0:
-        print(f"Error: {error.decode('utf-8')}")
-        sys.exit(1)
-    return output.decode("utf-8")

llama_stack/providers/adapters/agents/sample/sample.py DELETED Viewed

@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from .config import SampleConfig
-from llama_stack.apis.agents import *  # noqa: F403
-class SampleAgentsImpl(Agents):
-    def __init__(self, config: SampleConfig):
-        self.config = config
-    async def initialize(self):
-        pass

llama_stack/providers/adapters/inference/bedrock/bedrock.py DELETED Viewed

@@ -1,451 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from typing import *  # noqa: F403
-import boto3
-from botocore.client import BaseClient
-from botocore.config import Config
-from llama_models.llama3.api.chat_format import ChatFormat
-from llama_models.llama3.api.tokenizer import Tokenizer
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
-from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.providers.adapters.inference.bedrock.config import BedrockConfig
-BEDROCK_SUPPORTED_MODELS = {
-    "Llama3.1-8B-Instruct": "meta.llama3-1-8b-instruct-v1:0",
-    "Llama3.1-70B-Instruct": "meta.llama3-1-70b-instruct-v1:0",
-    "Llama3.1-405B-Instruct": "meta.llama3-1-405b-instruct-v1:0",
-}
-# NOTE: this is not quite tested after the recent refactors
-class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
-    def __init__(self, config: BedrockConfig) -> None:
-        ModelRegistryHelper.__init__(
-            self, stack_to_provider_models_map=BEDROCK_SUPPORTED_MODELS
-        )
-        self._config = config
-        self._client = _create_bedrock_client(config)
-        self.formatter = ChatFormat(Tokenizer.get_instance())
-    @property
-    def client(self) -> BaseClient:
-        return self._client
-    async def initialize(self) -> None:
-        pass
-    async def shutdown(self) -> None:
-        self.client.close()
-    def completion(
-        self,
-        model: str,
-        content: InterleavedTextMedia,
-        sampling_params: Optional[SamplingParams] = SamplingParams(),
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> Union[CompletionResponse, CompletionResponseStreamChunk]:
-        raise NotImplementedError()
-    @staticmethod
-    def _bedrock_stop_reason_to_stop_reason(bedrock_stop_reason: str) -> StopReason:
-        if bedrock_stop_reason == "max_tokens":
-            return StopReason.out_of_tokens
-        return StopReason.end_of_turn
-    @staticmethod
-    def _builtin_tool_name_to_enum(tool_name_str: str) -> Union[BuiltinTool, str]:
-        for builtin_tool in BuiltinTool:
-            if builtin_tool.value == tool_name_str:
-                return builtin_tool
-        else:
-            return tool_name_str
-    @staticmethod
-    def _bedrock_message_to_message(converse_api_res: Dict) -> Message:
-        stop_reason = BedrockInferenceAdapter._bedrock_stop_reason_to_stop_reason(
-            converse_api_res["stopReason"]
-        )
-        bedrock_message = converse_api_res["output"]["message"]
-        role = bedrock_message["role"]
-        contents = bedrock_message["content"]
-        tool_calls = []
-        text_content = []
-        for content in contents:
-            if "toolUse" in content:
-                tool_use = content["toolUse"]
-                tool_calls.append(
-                    ToolCall(
-                        tool_name=BedrockInferenceAdapter._builtin_tool_name_to_enum(
-                            tool_use["name"]
-                        ),
-                        arguments=tool_use["input"] if "input" in tool_use else None,
-                        call_id=tool_use["toolUseId"],
-                    )
-                )
-            elif "text" in content:
-                text_content.append(content["text"])
-        return CompletionMessage(
-            role=role,
-            content=text_content,
-            stop_reason=stop_reason,
-            tool_calls=tool_calls,
-        )
-    @staticmethod
-    def _messages_to_bedrock_messages(
-        messages: List[Message],
-    ) -> Tuple[List[Dict], Optional[List[Dict]]]:
-        bedrock_messages = []
-        system_bedrock_messages = []
-        user_contents = []
-        assistant_contents = None
-        for message in messages:
-            role = message.role
-            content_list = (
-                message.content
-                if isinstance(message.content, list)
-                else [message.content]
-            )
-            if role == "ipython" or role == "user":
-                if not user_contents:
-                    user_contents = []
-                if role == "ipython":
-                    user_contents.extend(
-                        [
-                            {
-                                "toolResult": {
-                                    "toolUseId": message.call_id,
-                                    "content": [
-                                        {"text": content} for content in content_list
-                                    ],
-                                }
-                            }
-                        ]
-                    )
-                else:
-                    user_contents.extend(
-                        [{"text": content} for content in content_list]
-                    )
-                if assistant_contents:
-                    bedrock_messages.append(
-                        {"role": "assistant", "content": assistant_contents}
-                    )
-                    assistant_contents = None
-            elif role == "system":
-                system_bedrock_messages.extend(
-                    [{"text": content} for content in content_list]
-                )
-            elif role == "assistant":
-                if not assistant_contents:
-                    assistant_contents = []
-                assistant_contents.extend(
-                    [
-                        {
-                            "text": content,
-                        }
-                        for content in content_list
-                    ]
-                    + [
-                        {
-                            "toolUse": {
-                                "input": tool_call.arguments,
-                                "name": (
-                                    tool_call.tool_name
-                                    if isinstance(tool_call.tool_name, str)
-                                    else tool_call.tool_name.value
-                                ),
-                                "toolUseId": tool_call.call_id,
-                            }
-                        }
-                        for tool_call in message.tool_calls
-                    ]
-                )
-                if user_contents:
-                    bedrock_messages.append({"role": "user", "content": user_contents})
-                    user_contents = None
-            else:
-                # Unknown role
-                pass
-        if user_contents:
-            bedrock_messages.append({"role": "user", "content": user_contents})
-        if assistant_contents:
-            bedrock_messages.append(
-                {"role": "assistant", "content": assistant_contents}
-            )
-        if system_bedrock_messages:
-            return bedrock_messages, system_bedrock_messages
-        return bedrock_messages, None
-    @staticmethod
-    def get_bedrock_inference_config(sampling_params: Optional[SamplingParams]) -> Dict:
-        inference_config = {}
-        if sampling_params:
-            param_mapping = {
-                "max_tokens": "maxTokens",
-                "temperature": "temperature",
-                "top_p": "topP",
-            }
-            for k, v in param_mapping.items():
-                if getattr(sampling_params, k):
-                    inference_config[v] = getattr(sampling_params, k)
-        return inference_config
-    @staticmethod
-    def _tool_parameters_to_input_schema(
-        tool_parameters: Optional[Dict[str, ToolParamDefinition]],
-    ) -> Dict:
-        input_schema = {"type": "object"}
-        if not tool_parameters:
-            return input_schema
-        json_properties = {}
-        required = []
-        for name, param in tool_parameters.items():
-            json_property = {
-                "type": param.param_type,
-            }
-            if param.description:
-                json_property["description"] = param.description
-            if param.required:
-                required.append(name)
-            json_properties[name] = json_property
-        input_schema["properties"] = json_properties
-        if required:
-            input_schema["required"] = required
-        return input_schema
-    @staticmethod
-    def _tools_to_tool_config(
-        tools: Optional[List[ToolDefinition]], tool_choice: Optional[ToolChoice]
-    ) -> Optional[Dict]:
-        if not tools:
-            return None
-        bedrock_tools = []
-        for tool in tools:
-            tool_name = (
-                tool.tool_name
-                if isinstance(tool.tool_name, str)
-                else tool.tool_name.value
-            )
-            tool_spec = {
-                "toolSpec": {
-                    "name": tool_name,
-                    "inputSchema": {
-                        "json": BedrockInferenceAdapter._tool_parameters_to_input_schema(
-                            tool.parameters
-                        ),
-                    },
-                }
-            }
-            if tool.description:
-                tool_spec["toolSpec"]["description"] = tool.description
-            bedrock_tools.append(tool_spec)
-        tool_config = {
-            "tools": bedrock_tools,
-        }
-        if tool_choice:
-            tool_config["toolChoice"] = (
-                {"any": {}}
-                if tool_choice.value == ToolChoice.required
-                else {"auto": {}}
-            )
-        return tool_config
-    def chat_completion(
-        self,
-        model: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = SamplingParams(),
-        # zero-shot tool definitions as input to the model
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> (
-        AsyncGenerator
-    ):  # Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]:
-        bedrock_model = self.map_to_provider_model(model)
-        inference_config = BedrockInferenceAdapter.get_bedrock_inference_config(
-            sampling_params
-        )
-        tool_config = BedrockInferenceAdapter._tools_to_tool_config(tools, tool_choice)
-        bedrock_messages, system_bedrock_messages = (
-            BedrockInferenceAdapter._messages_to_bedrock_messages(messages)
-        )
-        converse_api_params = {
-            "modelId": bedrock_model,
-            "messages": bedrock_messages,
-        }
-        if inference_config:
-            converse_api_params["inferenceConfig"] = inference_config
-        # Tool use is not supported in streaming mode
-        if tool_config and not stream:
-            converse_api_params["toolConfig"] = tool_config
-        if system_bedrock_messages:
-            converse_api_params["system"] = system_bedrock_messages
-        if not stream:
-            converse_api_res = self.client.converse(**converse_api_params)
-            output_message = BedrockInferenceAdapter._bedrock_message_to_message(
-                converse_api_res
-            )
-            yield ChatCompletionResponse(
-                completion_message=output_message,
-                logprobs=None,
-            )
-        else:
-            converse_stream_api_res = self.client.converse_stream(**converse_api_params)
-            event_stream = converse_stream_api_res["stream"]
-            for chunk in event_stream:
-                if "messageStart" in chunk:
-                    yield ChatCompletionResponseStreamChunk(
-                        event=ChatCompletionResponseEvent(
-                            event_type=ChatCompletionResponseEventType.start,
-                            delta="",
-                        )
-                    )
-                elif "contentBlockStart" in chunk:
-                    yield ChatCompletionResponseStreamChunk(
-                        event=ChatCompletionResponseEvent(
-                            event_type=ChatCompletionResponseEventType.progress,
-                            delta=ToolCallDelta(
-                                content=ToolCall(
-                                    tool_name=chunk["contentBlockStart"]["toolUse"][
-                                        "name"
-                                    ],
-                                    call_id=chunk["contentBlockStart"]["toolUse"][
-                                        "toolUseId"
-                                    ],
-                                ),
-                                parse_status=ToolCallParseStatus.started,
-                            ),
-                        )
-                    )
-                elif "contentBlockDelta" in chunk:
-                    if "text" in chunk["contentBlockDelta"]["delta"]:
-                        delta = chunk["contentBlockDelta"]["delta"]["text"]
-                    else:
-                        delta = ToolCallDelta(
-                            content=ToolCall(
-                                arguments=chunk["contentBlockDelta"]["delta"][
-                                    "toolUse"
-                                ]["input"]
-                            ),
-                            parse_status=ToolCallParseStatus.success,
-                        )
-                    yield ChatCompletionResponseStreamChunk(
-                        event=ChatCompletionResponseEvent(
-                            event_type=ChatCompletionResponseEventType.progress,
-                            delta=delta,
-                        )
-                    )
-                elif "contentBlockStop" in chunk:
-                    # Ignored
-                    pass
-                elif "messageStop" in chunk:
-                    stop_reason = (
-                        BedrockInferenceAdapter._bedrock_stop_reason_to_stop_reason(
-                            chunk["messageStop"]["stopReason"]
-                        )
-                    )
-                    yield ChatCompletionResponseStreamChunk(
-                        event=ChatCompletionResponseEvent(
-                            event_type=ChatCompletionResponseEventType.complete,
-                            delta="",
-                            stop_reason=stop_reason,
-                        )
-                    )
-                elif "metadata" in chunk:
-                    # Ignored
-                    pass
-                else:
-                    # Ignored
-                    pass
-    async def embeddings(
-        self,
-        model: str,
-        contents: List[InterleavedTextMedia],
-    ) -> EmbeddingsResponse:
-        raise NotImplementedError()
-def _create_bedrock_client(config: BedrockConfig) -> BaseClient:
-    retries_config = {
-        k: v
-        for k, v in dict(
-            total_max_attempts=config.total_max_attempts,
-            mode=config.retry_mode,
-        ).items()
-        if v is not None
-    }
-    config_args = {
-        k: v
-        for k, v in dict(
-            region_name=config.region_name,
-            retries=retries_config if retries_config else None,
-            connect_timeout=config.connect_timeout,
-            read_timeout=config.read_timeout,
-        ).items()
-        if v is not None
-    }
-    boto3_config = Config(**config_args)
-    session_args = {
-        k: v
-        for k, v in dict(
-            aws_access_key_id=config.aws_access_key_id,
-            aws_secret_access_key=config.aws_secret_access_key,
-            aws_session_token=config.aws_session_token,
-            region_name=config.region_name,
-            profile_name=config.profile_name,
-        ).items()
-        if v is not None
-    }
-    boto3_session = boto3.session.Session(**session_args)
-    return boto3_session.client("bedrock-runtime", config=boto3_config)

llama_stack/providers/adapters/inference/bedrock/config.py DELETED Viewed

@@ -1,55 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from typing import *  # noqa: F403
-from llama_models.schema_utils import json_schema_type
-from pydantic import BaseModel, Field
-@json_schema_type
-class BedrockConfig(BaseModel):
-    aws_access_key_id: Optional[str] = Field(
-        default=None,
-        description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
-    )
-    aws_secret_access_key: Optional[str] = Field(
-        default=None,
-        description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
-    )
-    aws_session_token: Optional[str] = Field(
-        default=None,
-        description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
-    )
-    region_name: Optional[str] = Field(
-        default=None,
-        description="The default AWS Region to use, for example, us-west-1 or us-west-2."
-        "Default use environment variable: AWS_DEFAULT_REGION",
-    )
-    profile_name: Optional[str] = Field(
-        default=None,
-        description="The profile name that contains credentials to use."
-        "Default use environment variable: AWS_PROFILE",
-    )
-    total_max_attempts: Optional[int] = Field(
-        default=None,
-        description="An integer representing the maximum number of attempts that will be made for a single request, "
-        "including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
-    )
-    retry_mode: Optional[str] = Field(
-        default=None,
-        description="A string representing the type of retries Boto3 will perform."
-        "Default use environment variable: AWS_RETRY_MODE",
-    )
-    connect_timeout: Optional[float] = Field(
-        default=60,
-        description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
-        "The default is 60 seconds.",
-    )
-    read_timeout: Optional[float] = Field(
-        default=60,
-        description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
-        "The default is 60 seconds.",
-    )

llama_stack/providers/adapters/inference/databricks/config.py DELETED Viewed

@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from llama_models.schema_utils import json_schema_type
-from pydantic import BaseModel, Field
-@json_schema_type
-class DatabricksImplConfig(BaseModel):
-    url: str = Field(
-        default=None,
-        description="The URL for the Databricks model serving endpoint",
-    )
-    api_token: str = Field(
-        default=None,
-        description="The Databricks API token",
-    )

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl