PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

llama_stack/providers/remote/inference/sambanova/config.py ADDED Viewed

@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import BaseModel, Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+class SambaNovaProviderDataValidator(BaseModel):
+    sambanova_api_key: str | None = Field(
+        default=None,
+        description="Sambanova Cloud API key",
+    )
+@json_schema_type
+class SambaNovaImplConfig(RemoteInferenceProviderConfig):
+    url: str = Field(
+        default="https://api.sambanova.ai/v1",
+        description="The URL for the SambaNova AI server",
+    )
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
+        return {
+            "url": "https://api.sambanova.ai/v1",
+            "api_key": api_key,
+        }

llama_stack/providers/remote/inference/sambanova/sambanova.py ADDED Viewed

@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import SambaNovaImplConfig
+class SambaNovaInferenceAdapter(OpenAIMixin):
+    config: SambaNovaImplConfig
+    provider_data_api_key_field: str = "sambanova_api_key"
+    download_images: bool = True  # SambaNova does not support image downloads server-size, perform them on the client
+    """
+    SambaNova Inference Adapter for Llama Stack.
+    """
+    def get_base_url(self) -> str:
+        """
+        Get the base URL for OpenAI mixin.
+        :return: The SambaNova base URL
+        """
+        return self.config.url

llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py RENAMED Viewed

@@ -4,16 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Union
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
-from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
 async def get_adapter_impl(
-    config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig],
+    config: InferenceAPIImplConfig | InferenceEndpointImplConfig | TGIImplConfig,
     _deps,
 ):
+    from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
     if isinstance(config, TGIImplConfig):
         impl = TGIAdapter()
     elif isinstance(config, InferenceAPIImplConfig):

llama_stack/providers/remote/inference/tgi/config.py ADDED Viewed

@@ -0,0 +1,76 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from pydantic import BaseModel, Field, SecretStr
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
+class TGIImplConfig(RemoteInferenceProviderConfig):
+    auth_credential: SecretStr | None = Field(default=None, exclude=True)
+    url: str = Field(
+        description="The URL for the TGI serving endpoint",
+    )
+    @classmethod
+    def sample_run_config(
+        cls,
+        url: str = "${env.TGI_URL:=}",
+        **kwargs,
+    ):
+        return {
+            "url": url,
+        }
+@json_schema_type
+class InferenceEndpointImplConfig(BaseModel):
+    endpoint_name: str = Field(
+        description="The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided.",
+    )
+    api_token: SecretStr | None = Field(
+        default=None,
+        description="Your Hugging Face user access token (will default to locally saved token if not provided)",
+    )
+    @classmethod
+    def sample_run_config(
+        cls,
+        endpoint_name: str = "${env.INFERENCE_ENDPOINT_NAME}",
+        api_token: str = "${env.HF_API_TOKEN}",
+        **kwargs,
+    ):
+        return {
+            "endpoint_name": endpoint_name,
+            "api_token": api_token,
+        }
+@json_schema_type
+class InferenceAPIImplConfig(BaseModel):
+    huggingface_repo: str = Field(
+        description="The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct')",
+    )
+    api_token: SecretStr | None = Field(
+        default=None,
+        description="Your Hugging Face user access token (will default to locally saved token if not provided)",
+    )
+    @classmethod
+    def sample_run_config(
+        cls,
+        repo: str = "${env.INFERENCE_MODEL}",
+        api_token: str = "${env.HF_API_TOKEN}",
+        **kwargs,
+    ):
+        return {
+            "huggingface_repo": repo,
+            "api_token": api_token,
+        }

llama_stack/providers/remote/inference/tgi/tgi.py ADDED Viewed

@@ -0,0 +1,85 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from collections.abc import Iterable
+from huggingface_hub import AsyncInferenceClient, HfApi
+from pydantic import SecretStr
+from llama_stack.apis.inference import (
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
+log = get_logger(name=__name__, category="inference::tgi")
+class _HfAdapter(OpenAIMixin):
+    url: str
+    api_key: SecretStr
+    hf_client: AsyncInferenceClient
+    max_tokens: int
+    model_id: str
+    overwrite_completion_id = True  # TGI always returns id=""
+    def get_api_key(self):
+        return "NO KEY REQUIRED"
+    def get_base_url(self):
+        return self.url
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        return [self.model_id]
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+class TGIAdapter(_HfAdapter):
+    async def initialize(self, config: TGIImplConfig) -> None:
+        if not config.url:
+            raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
+        log.info(f"Initializing TGI client with url={config.url}")
+        self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference")
+        endpoint_info = await self.hf_client.get_endpoint_info()
+        self.max_tokens = endpoint_info["max_total_tokens"]
+        self.model_id = endpoint_info["model_id"]
+        self.url = f"{config.url.rstrip('/')}/v1"
+        self.api_key = SecretStr("NO_KEY")
+class InferenceAPIAdapter(_HfAdapter):
+    async def initialize(self, config: InferenceAPIImplConfig) -> None:
+        self.hf_client = AsyncInferenceClient(model=config.huggingface_repo, token=config.api_token.get_secret_value())
+        endpoint_info = await self.hf_client.get_endpoint_info()
+        self.max_tokens = endpoint_info["max_total_tokens"]
+        self.model_id = endpoint_info["model_id"]
+        # TODO: how do we set url for this?
+class InferenceEndpointAdapter(_HfAdapter):
+    async def initialize(self, config: InferenceEndpointImplConfig) -> None:
+        # Get the inference endpoint details
+        api = HfApi(token=config.api_token.get_secret_value())
+        endpoint = api.get_inference_endpoint(config.endpoint_name)
+        # Wait for the endpoint to be ready (if not already)
+        endpoint.wait(timeout=60)
+        # Initialize the adapter
+        self.hf_client = endpoint.async_client
+        self.model_id = endpoint.repository
+        self.max_tokens = int(endpoint.raw["model"]["image"]["custom"]["env"]["MAX_TOTAL_TOKENS"])
+        # TODO: how do we set url for this?

llama_stack/providers/{adapters → remote}/inference/together/__init__.py RENAMED Viewed

@@ -4,15 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from pydantic import BaseModel
 from .config import TogetherImplConfig
+class TogetherProviderDataValidator(BaseModel):
+    together_api_key: str
 async def get_adapter_impl(config: TogetherImplConfig, _deps):
     from .together import TogetherInferenceAdapter
-    assert isinstance(
-        config, TogetherImplConfig
-    ), f"Unexpected config type: {type(config)}"
-    impl = TogetherInferenceAdapter(config)
+    assert isinstance(config, TogetherImplConfig), f"Unexpected config type: {type(config)}"
+    impl = TogetherInferenceAdapter(config=config)
     await impl.initialize()
     return impl

llama_stack/providers/remote/inference/together/config.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
+class TogetherImplConfig(RemoteInferenceProviderConfig):
+    url: str = Field(
+        default="https://api.together.xyz/v1",
+        description="The URL for the Together AI server",
+    )
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+        return {
+            "url": "https://api.together.xyz/v1",
+            "api_key": "${env.TOGETHER_API_KEY:=}",
+        }

llama_stack/providers/remote/inference/together/together.py ADDED Viewed

@@ -0,0 +1,102 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from collections.abc import Iterable
+from together import AsyncTogether
+from together.constants import BASE_URL
+from llama_stack.apis.inference import (
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
+from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
+from llama_stack.apis.models import Model
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import TogetherImplConfig
+logger = get_logger(name=__name__, category="inference::together")
+class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
+    config: TogetherImplConfig
+    embedding_model_metadata: dict[str, dict[str, int]] = {
+        "togethercomputer/m2-bert-80M-32k-retrieval": {"embedding_dimension": 768, "context_length": 32768},
+        "BAAI/bge-large-en-v1.5": {"embedding_dimension": 1024, "context_length": 512},
+        "BAAI/bge-base-en-v1.5": {"embedding_dimension": 768, "context_length": 512},
+        "Alibaba-NLP/gte-modernbert-base": {"embedding_dimension": 768, "context_length": 8192},
+        "intfloat/multilingual-e5-large-instruct": {"embedding_dimension": 1024, "context_length": 512},
+    }
+    _model_cache: dict[str, Model] = {}
+    provider_data_api_key_field: str = "together_api_key"
+    def get_base_url(self):
+        return BASE_URL
+    def _get_client(self) -> AsyncTogether:
+        together_api_key = None
+        config_api_key = self.config.auth_credential.get_secret_value() if self.config.auth_credential else None
+        if config_api_key:
+            together_api_key = config_api_key
+        else:
+            provider_data = self.get_request_provider_data()
+            if provider_data is None or not provider_data.together_api_key:
+                raise ValueError(
+                    'Pass Together API Key in the header X-LlamaStack-Provider-Data as { "together_api_key": <your api key>}'
+                )
+            together_api_key = provider_data.together_api_key
+        return AsyncTogether(api_key=together_api_key)
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        # Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
+        return [m.id for m in await self._get_client().models.list()]
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        """
+        Together's OpenAI-compatible embeddings endpoint is not compatible with
+        the standard OpenAI embeddings endpoint.
+        The endpoint -
+         - not all models return usage information
+         - does not support user param, returns 400 Unrecognized request arguments supplied: user
+         - does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions
+        """
+        # Together support ticket #13332 -> will not fix
+        if params.user is not None:
+            raise ValueError("Together's embeddings endpoint does not support user param.")
+        # Together support ticket #13333 -> escalated
+        if params.dimensions is not None:
+            raise ValueError("Together's embeddings endpoint does not support dimensions param.")
+        response = await self.client.embeddings.create(
+            model=await self._get_provider_model_id(params.model),
+            input=params.input,
+            encoding_format=params.encoding_format,
+        )
+        response.model = (
+            params.model
+        )  # return the user the same model id they provided, avoid exposing the provider model id
+        # Together support ticket #13330 -> escalated
+        #  - togethercomputer/m2-bert-80M-32k-retrieval *does not* return usage information
+        if not hasattr(response, "usage") or response.usage is None:
+            logger.warning(
+                f"Together's embedding endpoint for {params.model} did not return usage information, substituting -1s."
+            )
+            response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
+        return response  # type: ignore[no-any-return]

llama_stack/providers/remote/inference/vertexai/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .config import VertexAIConfig
+async def get_adapter_impl(config: VertexAIConfig, _deps):
+    from .vertexai import VertexAIInferenceAdapter
+    impl = VertexAIInferenceAdapter(config=config)
+    await impl.initialize()
+    return impl

llama_stack/providers/remote/inference/vertexai/config.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import BaseModel, Field, SecretStr
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+class VertexAIProviderDataValidator(BaseModel):
+    vertex_project: str | None = Field(
+        default=None,
+        description="Google Cloud project ID for Vertex AI",
+    )
+    vertex_location: str | None = Field(
+        default=None,
+        description="Google Cloud location for Vertex AI (e.g., us-central1)",
+    )
+@json_schema_type
+class VertexAIConfig(RemoteInferenceProviderConfig):
+    auth_credential: SecretStr | None = Field(default=None, exclude=True)
+    project: str = Field(
+        description="Google Cloud project ID for Vertex AI",
+    )
+    location: str = Field(
+        default="us-central1",
+        description="Google Cloud location for Vertex AI",
+    )
+    @classmethod
+    def sample_run_config(
+        cls,
+        project: str = "${env.VERTEX_AI_PROJECT:=}",
+        location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
+        **kwargs,
+    ) -> dict[str, Any]:
+        return {
+            "project": project,
+            "location": location,
+        }

llama_stack/providers/remote/inference/vertexai/vertexai.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from collections.abc import Iterable
+import google.auth.transport.requests
+from google.auth import default
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import VertexAIConfig
+class VertexAIInferenceAdapter(OpenAIMixin):
+    config: VertexAIConfig
+    provider_data_api_key_field: str = "vertex_project"
+    def get_api_key(self) -> str:
+        """
+        Get an access token for Vertex AI using Application Default Credentials.
+        Vertex AI uses ADC instead of API keys. This method obtains an access token
+        from the default credentials and returns it for use with the OpenAI-compatible client.
+        """
+        try:
+            # Get default credentials - will read from GOOGLE_APPLICATION_CREDENTIALS
+            credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
+            credentials.refresh(google.auth.transport.requests.Request())
+            return str(credentials.token)
+        except Exception:
+            # If we can't get credentials, return empty string to let the env work with ADC directly
+            return ""
+    def get_base_url(self) -> str:
+        """
+        Get the Vertex AI OpenAI-compatible API base URL.
+        Returns the Vertex AI OpenAI-compatible endpoint URL.
+        Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
+        """
+        return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        """
+        VertexAI doesn't currently offer a way to query a list of available models from Google's Model Garden
+        For now we return a hardcoded version of the available models
+        :return: An iterable of model IDs
+        """
+        return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]

llama_stack/providers/remote/inference/vllm/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from pydantic import BaseModel
+from .config import VLLMInferenceAdapterConfig
+class VLLMProviderDataValidator(BaseModel):
+    vllm_api_token: str | None = None
+async def get_adapter_impl(config: VLLMInferenceAdapterConfig, _deps):
+    from .vllm import VLLMInferenceAdapter
+    assert isinstance(config, VLLMInferenceAdapterConfig), f"Unexpected config type: {type(config)}"
+    impl = VLLMInferenceAdapter(config=config)
+    await impl.initialize()
+    return impl

llama_stack/providers/remote/inference/vllm/config.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from pathlib import Path
+from pydantic import Field, SecretStr, field_validator
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
+class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
+    url: str | None = Field(
+        default=None,
+        description="The URL for the vLLM model serving endpoint",
+    )
+    max_tokens: int = Field(
+        default=4096,
+        description="Maximum number of tokens to generate.",
+    )
+    auth_credential: SecretStr | None = Field(
+        default=None,
+        alias="api_token",
+        description="The API token",
+    )
+    tls_verify: bool | str = Field(
+        default=True,
+        description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
+    )
+    @field_validator("tls_verify")
+    @classmethod
+    def validate_tls_verify(cls, v):
+        if isinstance(v, str):
+            # Otherwise, treat it as a cert path
+            cert_path = Path(v).expanduser().resolve()
+            if not cert_path.exists():
+                raise ValueError(f"TLS certificate file does not exist: {v}")
+            if not cert_path.is_file():
+                raise ValueError(f"TLS certificate path is not a file: {v}")
+            return v
+        return v
+    @classmethod
+    def sample_run_config(
+        cls,
+        url: str = "${env.VLLM_URL:=}",
+        **kwargs,
+    ):
+        return {
+            "url": url,
+            "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
+            "api_token": "${env.VLLM_API_TOKEN:=fake}",
+            "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
+        }

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl