PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

llama_stack/providers/remote/inference/nvidia/config.py ADDED Viewed

@@ -0,0 +1,64 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import os
+from typing import Any
+from pydantic import Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
+class NVIDIAConfig(RemoteInferenceProviderConfig):
+    """
+    Configuration for the NVIDIA NIM inference endpoint.
+    Attributes:
+        url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000
+        api_key (str): The access key for the hosted NIM endpoints
+    There are two ways to access NVIDIA NIMs -
+     0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com
+     1. Self-hosted: You can run NVIDIA NIMs on your own infrastructure
+    By default the configuration is set to use the hosted APIs. This requires
+    an API key which can be obtained from https://ngc.nvidia.com/.
+    By default the configuration will attempt to read the NVIDIA_API_KEY environment
+    variable to set the api_key. Please do not put your API key in code.
+    If you are using a self-hosted NVIDIA NIM, you can set the url to the
+    URL of your running NVIDIA NIM and do not need to set the api_key.
+    """
+    url: str = Field(
+        default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"),
+        description="A base url for accessing the NVIDIA NIM",
+    )
+    timeout: int = Field(
+        default=60,
+        description="Timeout for the HTTP requests",
+    )
+    append_api_version: bool = Field(
+        default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
+        description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
+    )
+    @classmethod
+    def sample_run_config(
+        cls,
+        url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
+        api_key: str = "${env.NVIDIA_API_KEY:=}",
+        append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
+        **kwargs,
+    ) -> dict[str, Any]:
+        return {
+            "url": url,
+            "api_key": api_key,
+            "append_api_version": append_api_version,
+        }

llama_stack/providers/remote/inference/nvidia/nvidia.py ADDED Viewed

@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from . import NVIDIAConfig
+from .utils import _is_nvidia_hosted
+logger = get_logger(name=__name__, category="inference::nvidia")
+class NVIDIAInferenceAdapter(OpenAIMixin):
+    config: NVIDIAConfig
+    """
+    NVIDIA Inference Adapter for Llama Stack.
+    """
+    # source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
+    embedding_model_metadata: dict[str, dict[str, int]] = {
+        "nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
+        "nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
+        "nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
+        "snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
+    }
+    async def initialize(self) -> None:
+        logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
+        if _is_nvidia_hosted(self.config):
+            if not self.config.auth_credential:
+                raise RuntimeError(
+                    "API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
+                )
+    def get_api_key(self) -> str:
+        """
+        Get the API key for OpenAI mixin.
+        :return: The NVIDIA API key
+        """
+        if self.config.auth_credential:
+            return self.config.auth_credential.get_secret_value()
+        if not _is_nvidia_hosted(self.config):
+            return "NO KEY REQUIRED"
+        return None
+    def get_base_url(self) -> str:
+        """
+        Get the base URL for OpenAI mixin.
+        :return: The NVIDIA API base URL
+        """
+        return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url

llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} RENAMED Viewed

@@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from pydantic import BaseModel
+from . import NVIDIAConfig
-class SampleConfig(BaseModel):
-    host: str = "localhost"
-    port: int = 9999
+def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
+    return "integrate.api.nvidia.com" in config.url

llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py RENAMED Viewed

@@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any
+from .config import OllamaImplConfig
-from .config import VLLMConfig
+async def get_adapter_impl(config: OllamaImplConfig, _deps):
+    from .ollama import OllamaInferenceAdapter
-async def get_provider_impl(config: VLLMConfig, _deps) -> Any:
-    from .vllm import VLLMInferenceImpl
-    impl = VLLMInferenceImpl(config)
+    impl = OllamaInferenceAdapter(config=config)
     await impl.initialize()
     return impl

llama_stack/providers/remote/inference/ollama/config.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import Field, SecretStr
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+DEFAULT_OLLAMA_URL = "http://localhost:11434"
+class OllamaImplConfig(RemoteInferenceProviderConfig):
+    auth_credential: SecretStr | None = Field(default=None, exclude=True)
+    url: str = DEFAULT_OLLAMA_URL
+    @classmethod
+    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
+        return {
+            "url": url,
+        }

llama_stack/providers/remote/inference/ollama/ollama.py ADDED Viewed

@@ -0,0 +1,102 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import asyncio
+from ollama import AsyncClient as AsyncOllamaClient
+from llama_stack.apis.common.errors import UnsupportedModelError
+from llama_stack.apis.models import Model
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import (
+    HealthResponse,
+    HealthStatus,
+)
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+logger = get_logger(name=__name__, category="inference::ollama")
+class OllamaInferenceAdapter(OpenAIMixin):
+    config: OllamaImplConfig
+    # automatically set by the resolver when instantiating the provider
+    __provider_id__: str
+    embedding_model_metadata: dict[str, dict[str, int]] = {
+        "all-minilm:l6-v2": {
+            "embedding_dimension": 384,
+            "context_length": 512,
+        },
+        "nomic-embed-text:latest": {
+            "embedding_dimension": 768,
+            "context_length": 8192,
+        },
+        "nomic-embed-text:v1.5": {
+            "embedding_dimension": 768,
+            "context_length": 8192,
+        },
+        "nomic-embed-text:137m-v1.5-fp16": {
+            "embedding_dimension": 768,
+            "context_length": 8192,
+        },
+    }
+    download_images: bool = True
+    _clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {}
+    @property
+    def ollama_client(self) -> AsyncOllamaClient:
+        # ollama client attaches itself to the current event loop (sadly?)
+        loop = asyncio.get_running_loop()
+        if loop not in self._clients:
+            self._clients[loop] = AsyncOllamaClient(host=self.config.url)
+        return self._clients[loop]
+    def get_api_key(self):
+        return "NO KEY REQUIRED"
+    def get_base_url(self):
+        return self.config.url.rstrip("/") + "/v1"
+    async def initialize(self) -> None:
+        logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
+        r = await self.health()
+        if r["status"] == HealthStatus.ERROR:
+            logger.warning(
+                f"Ollama Server is not running (message: {r['message']}). Make sure to start it using `ollama serve` in a separate terminal"
+            )
+    async def health(self) -> HealthResponse:
+        """
+        Performs a health check by verifying connectivity to the Ollama server.
+        This method is used by initialize() and the Provider API to verify that the service is running
+        correctly.
+        Returns:
+            HealthResponse: A dictionary containing the health status.
+        """
+        try:
+            await self.ollama_client.ps()
+            return HealthResponse(status=HealthStatus.OK)
+        except Exception as e:
+            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
+    async def shutdown(self) -> None:
+        self._clients.clear()
+    async def register_model(self, model: Model) -> Model:
+        if await self.check_model_availability(model.provider_model_id):
+            return model
+        elif await self.check_model_availability(f"{model.provider_model_id}:latest"):
+            model.provider_resource_id = f"{model.provider_model_id}:latest"
+            logger.warning(
+                f"Imprecise provider resource id was used but 'latest' is available in Ollama - using '{model.provider_model_id}'"
+            )
+            return model
+        raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys()))

llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py RENAMED Viewed

@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .config import OpenTelemetryConfig
+from .config import OpenAIConfig
-async def get_adapter_impl(config: OpenTelemetryConfig, _deps):
-    from .opentelemetry import OpenTelemetryAdapter
+async def get_adapter_impl(config: OpenAIConfig, _deps):
+    from .openai import OpenAIInferenceAdapter
-    impl = OpenTelemetryAdapter(config)
+    impl = OpenAIInferenceAdapter(config=config)
     await impl.initialize()
     return impl

llama_stack/providers/remote/inference/openai/config.py ADDED Viewed

@@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import BaseModel, Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+class OpenAIProviderDataValidator(BaseModel):
+    openai_api_key: str | None = Field(
+        default=None,
+        description="API key for OpenAI models",
+    )
+@json_schema_type
+class OpenAIConfig(RemoteInferenceProviderConfig):
+    base_url: str = Field(
+        default="https://api.openai.com/v1",
+        description="Base URL for OpenAI API",
+    )
+    @classmethod
+    def sample_run_config(
+        cls,
+        api_key: str = "${env.OPENAI_API_KEY:=}",
+        base_url: str = "${env.OPENAI_BASE_URL:=https://api.openai.com/v1}",
+        **kwargs,
+    ) -> dict[str, Any]:
+        return {
+            "api_key": api_key,
+            "base_url": base_url,
+        }

llama_stack/providers/remote/inference/openai/openai.py ADDED Viewed

@@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import OpenAIConfig
+logger = get_logger(name=__name__, category="inference::openai")
+#
+# This OpenAI adapter implements Inference methods using OpenAIMixin
+#
+class OpenAIInferenceAdapter(OpenAIMixin):
+    """
+    OpenAI Inference Adapter for Llama Stack.
+    """
+    config: OpenAIConfig
+    provider_data_api_key_field: str = "openai_api_key"
+    embedding_model_metadata: dict[str, dict[str, int]] = {
+        "text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
+        "text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},
+    }
+    def get_base_url(self) -> str:
+        """
+        Get the OpenAI API base URL.
+        Returns the OpenAI API base URL from the configuration.
+        """
+        return self.config.base_url

llama_stack/providers/remote/inference/passthrough/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from pydantic import BaseModel
+from .config import PassthroughImplConfig
+class PassthroughProviderDataValidator(BaseModel):
+    url: str
+    api_key: str
+async def get_adapter_impl(config: PassthroughImplConfig, _deps):
+    from .passthrough import PassthroughInferenceAdapter
+    assert isinstance(config, PassthroughImplConfig), f"Unexpected config type: {type(config)}"
+    impl = PassthroughInferenceAdapter(config)
+    await impl.initialize()
+    return impl

llama_stack/providers/remote/inference/passthrough/config.py ADDED Viewed

@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import Field, SecretStr
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
+class PassthroughImplConfig(RemoteInferenceProviderConfig):
+    url: str = Field(
+        default=None,
+        description="The URL for the passthrough endpoint",
+    )
+    api_key: SecretStr | None = Field(
+        default=None,
+        description="API Key for the passthrouth endpoint",
+    )
+    @classmethod
+    def sample_run_config(
+        cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
+    ) -> dict[str, Any]:
+        return {
+            "url": url,
+            "api_key": api_key,
+        }

llama_stack/providers/remote/inference/passthrough/passthrough.py ADDED Viewed

@@ -0,0 +1,122 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from collections.abc import AsyncIterator
+from typing import Any
+from llama_stack_client import AsyncLlamaStackClient
+from llama_stack.apis.inference import (
+    Inference,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAICompletion,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
+from llama_stack.apis.models import Model
+from llama_stack.core.library_client import convert_pydantic_to_json_value
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+from .config import PassthroughImplConfig
+class PassthroughInferenceAdapter(Inference):
+    def __init__(self, config: PassthroughImplConfig) -> None:
+        ModelRegistryHelper.__init__(self)
+        self.config = config
+    async def unregister_model(self, model_id: str) -> None:
+        pass
+    async def register_model(self, model: Model) -> Model:
+        return model
+    def _get_client(self) -> AsyncLlamaStackClient:
+        passthrough_url = None
+        passthrough_api_key = None
+        provider_data = None
+        if self.config.url is not None:
+            passthrough_url = self.config.url
+        else:
+            provider_data = self.get_request_provider_data()
+            if provider_data is None or not provider_data.passthrough_url:
+                raise ValueError(
+                    'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
+                )
+            passthrough_url = provider_data.passthrough_url
+        if self.config.api_key is not None:
+            passthrough_api_key = self.config.api_key.get_secret_value()
+        else:
+            provider_data = self.get_request_provider_data()
+            if provider_data is None or not provider_data.passthrough_api_key:
+                raise ValueError(
+                    'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
+                )
+            passthrough_api_key = provider_data.passthrough_api_key
+        return AsyncLlamaStackClient(
+            base_url=passthrough_url,
+            api_key=passthrough_api_key,
+            provider_data=provider_data,
+        )
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+    async def openai_completion(
+        self,
+        params: OpenAICompletionRequestWithExtraBody,
+    ) -> OpenAICompletion:
+        client = self._get_client()
+        model_obj = await self.model_store.get_model(params.model)
+        params = params.model_copy()
+        params.model = model_obj.provider_resource_id
+        request_params = params.model_dump(exclude_none=True)
+        return await client.inference.openai_completion(**request_params)
+    async def openai_chat_completion(
+        self,
+        params: OpenAIChatCompletionRequestWithExtraBody,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        client = self._get_client()
+        model_obj = await self.model_store.get_model(params.model)
+        params = params.model_copy()
+        params.model = model_obj.provider_resource_id
+        request_params = params.model_dump(exclude_none=True)
+        return await client.inference.openai_chat_completion(**request_params)
+    def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
+        json_params = {}
+        for key, value in request_params.items():
+            json_input = convert_pydantic_to_json_value(value)
+            if isinstance(json_input, dict):
+                json_input = {k: v for k, v in json_input.items() if v is not None}
+            elif isinstance(json_input, list):
+                json_input = [x for x in json_input if x is not None]
+                new_input = []
+                for x in json_input:
+                    if isinstance(x, dict):
+                        x = {k: v for k, v in x.items() if v is not None}
+                    new_input.append(x)
+                json_input = new_input
+            json_params[key] = json_input
+        return json_params

llama_stack/providers/remote/inference/runpod/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .config import RunpodImplConfig
+async def get_adapter_impl(config: RunpodImplConfig, _deps):
+    from .runpod import RunpodInferenceAdapter
+    assert isinstance(config, RunpodImplConfig), f"Unexpected config type: {type(config)}"
+    impl = RunpodInferenceAdapter(config=config)
+    await impl.initialize()
+    return impl

llama_stack/providers/remote/inference/runpod/config.py ADDED Viewed

@@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import Field, SecretStr
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
+class RunpodImplConfig(RemoteInferenceProviderConfig):
+    url: str | None = Field(
+        default=None,
+        description="The URL for the Runpod model serving endpoint",
+    )
+    auth_credential: SecretStr | None = Field(
+        default=None,
+        alias="api_token",
+        description="The API token",
+    )
+    @classmethod
+    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
+        return {
+            "url": "${env.RUNPOD_URL:=}",
+            "api_token": "${env.RUNPOD_API_TOKEN}",
+        }

llama_stack/providers/remote/inference/runpod/runpod.py ADDED Viewed

@@ -0,0 +1,42 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from collections.abc import AsyncIterator
+from llama_stack.apis.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+)
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import RunpodImplConfig
+class RunpodInferenceAdapter(OpenAIMixin):
+    """
+    Adapter for RunPod's OpenAI-compatible API endpoints.
+    Supports VLLM for serverless endpoint self-hosted or public endpoints.
+    Can work with any runpod endpoints that support OpenAI-compatible API
+    """
+    config: RunpodImplConfig
+    def get_base_url(self) -> str:
+        """Get base URL for OpenAI client."""
+        return self.config.url
+    async def openai_chat_completion(
+        self,
+        params: OpenAIChatCompletionRequestWithExtraBody,
+    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
+        """Override to add RunPod-specific stream_options requirement."""
+        params = params.model_copy()
+        if params.stream and not params.stream_options:
+            params.stream_options = {"include_usage": True}
+        return await super().openai_chat_completion(params)

llama_stack/providers/remote/inference/sambanova/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .config import SambaNovaImplConfig
+async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
+    from .sambanova import SambaNovaInferenceAdapter
+    assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
+    impl = SambaNovaInferenceAdapter(config=config)
+    await impl.initialize()
+    return impl

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl