PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py RENAMED Viewed

@@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from pydantic import BaseModel
+from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
-class SampleConfig(BaseModel):
-    host: str = "localhost"
-    port: int = 9999
+class BedrockConfig(BedrockBaseConfig):
+    pass

llama_stack/providers/remote/inference/bedrock/models.py ADDED Viewed

@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_hf_repo_model_entry,
+)
+SAFETY_MODELS_ENTRIES = []
+# https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html
+MODEL_ENTRIES = [
+    build_hf_repo_model_entry(
+        "meta.llama3-1-8b-instruct-v1:0",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta.llama3-1-70b-instruct-v1:0",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta.llama3-1-405b-instruct-v1:0",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+] + SAFETY_MODELS_ENTRIES

llama_stack/providers/remote/inference/cerebras/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .config import CerebrasImplConfig
+async def get_adapter_impl(config: CerebrasImplConfig, _deps):
+    from .cerebras import CerebrasInferenceAdapter
+    assert isinstance(config, CerebrasImplConfig), f"Unexpected config type: {type(config)}"
+    impl = CerebrasInferenceAdapter(config=config)
+    await impl.initialize()
+    return impl

llama_stack/providers/remote/inference/cerebras/cerebras.py ADDED Viewed

@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from urllib.parse import urljoin
+from llama_stack.apis.inference import (
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import CerebrasImplConfig
+class CerebrasInferenceAdapter(OpenAIMixin):
+    config: CerebrasImplConfig
+    def get_base_url(self) -> str:
+        return urljoin(self.config.base_url, "v1")
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()

llama_stack/providers/remote/inference/cerebras/config.py ADDED Viewed

@@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import os
+from typing import Any
+from pydantic import Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+DEFAULT_BASE_URL = "https://api.cerebras.ai"
+@json_schema_type
+class CerebrasImplConfig(RemoteInferenceProviderConfig):
+    base_url: str = Field(
+        default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
+        description="Base URL for the Cerebras API",
+    )
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]:
+        return {
+            "base_url": DEFAULT_BASE_URL,
+            "api_key": api_key,
+        }

llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py RENAMED Viewed

@@ -5,13 +5,12 @@
 # the root directory of this source tree.
 from .config import DatabricksImplConfig
-from .databricks import DatabricksInferenceAdapter
 async def get_adapter_impl(config: DatabricksImplConfig, _deps):
-    assert isinstance(
-        config, DatabricksImplConfig
-    ), f"Unexpected config type: {type(config)}"
-    impl = DatabricksInferenceAdapter(config)
+    from .databricks import DatabricksInferenceAdapter
+    assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}"
+    impl = DatabricksInferenceAdapter(config=config)
     await impl.initialize()
     return impl

llama_stack/providers/remote/inference/databricks/config.py ADDED Viewed

@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import Field, SecretStr
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
+class DatabricksImplConfig(RemoteInferenceProviderConfig):
+    url: str | None = Field(
+        default=None,
+        description="The URL for the Databricks model serving endpoint",
+    )
+    auth_credential: SecretStr | None = Field(
+        default=None,
+        alias="api_token",
+        description="The Databricks API token",
+    )
+    @classmethod
+    def sample_run_config(
+        cls,
+        url: str = "${env.DATABRICKS_HOST:=}",
+        api_token: str = "${env.DATABRICKS_TOKEN:=}",
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        return {
+            "url": url,
+            "api_token": api_token,
+        }

llama_stack/providers/remote/inference/databricks/databricks.py ADDED Viewed

@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from collections.abc import Iterable
+from databricks.sdk import WorkspaceClient
+from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import DatabricksImplConfig
+logger = get_logger(name=__name__, category="inference::databricks")
+class DatabricksInferenceAdapter(OpenAIMixin):
+    config: DatabricksImplConfig
+    # source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models
+    embedding_model_metadata: dict[str, dict[str, int]] = {
+        "databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192},
+        "databricks-bge-large-en": {"embedding_dimension": 1024, "context_length": 512},
+    }
+    def get_base_url(self) -> str:
+        return f"{self.config.url}/serving-endpoints"
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        return [
+            endpoint.name
+            for endpoint in WorkspaceClient(
+                host=self.config.url, token=self.get_api_key()
+            ).serving_endpoints.list()  # TODO: this is not async
+        ]
+    async def openai_completion(
+        self,
+        params: OpenAICompletionRequestWithExtraBody,
+    ) -> OpenAICompletion:
+        raise NotImplementedError()

llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py RENAMED Viewed

@@ -4,15 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from pydantic import BaseModel
 from .config import FireworksImplConfig
+class FireworksProviderDataValidator(BaseModel):
+    fireworks_api_key: str
 async def get_adapter_impl(config: FireworksImplConfig, _deps):
     from .fireworks import FireworksInferenceAdapter
-    assert isinstance(
-        config, FireworksImplConfig
-    ), f"Unexpected config type: {type(config)}"
-    impl = FireworksInferenceAdapter(config)
+    assert isinstance(config, FireworksImplConfig), f"Unexpected config type: {type(config)}"
+    impl = FireworksInferenceAdapter(config=config)
     await impl.initialize()
     return impl

llama_stack/providers/remote/inference/fireworks/config.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
+class FireworksImplConfig(RemoteInferenceProviderConfig):
+    url: str = Field(
+        default="https://api.fireworks.ai/inference/v1",
+        description="The URL for the Fireworks server",
+    )
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
+        return {
+            "url": "https://api.fireworks.ai/inference/v1",
+            "api_key": api_key,
+        }

llama_stack/providers/remote/inference/fireworks/fireworks.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import FireworksImplConfig
+logger = get_logger(name=__name__, category="inference::fireworks")
+class FireworksInferenceAdapter(OpenAIMixin):
+    config: FireworksImplConfig
+    embedding_model_metadata: dict[str, dict[str, int]] = {
+        "nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
+        "accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
+    }
+    provider_data_api_key_field: str = "fireworks_api_key"
+    def get_base_url(self) -> str:
+        return "https://api.fireworks.ai/inference/v1"

llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py RENAMED Viewed

@@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .config import PGVectorConfig
+from .config import GeminiConfig
-async def get_adapter_impl(config: PGVectorConfig, _deps):
-    from .pgvector import PGVectorMemoryAdapter
+async def get_adapter_impl(config: GeminiConfig, _deps):
+    from .gemini import GeminiInferenceAdapter
-    impl = PGVectorMemoryAdapter(config)
+    impl = GeminiInferenceAdapter(config=config)
     await impl.initialize()
     return impl

llama_stack/providers/remote/inference/gemini/config.py ADDED Viewed

@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import BaseModel, Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+class GeminiProviderDataValidator(BaseModel):
+    gemini_api_key: str | None = Field(
+        default=None,
+        description="API key for Gemini models",
+    )
+@json_schema_type
+class GeminiConfig(RemoteInferenceProviderConfig):
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY:=}", **kwargs) -> dict[str, Any]:
+        return {
+            "api_key": api_key,
+        }

llama_stack/providers/remote/inference/gemini/gemini.py ADDED Viewed

@@ -0,0 +1,82 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from openai import NOT_GIVEN
+from llama_stack.apis.inference import (
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from .config import GeminiConfig
+class GeminiInferenceAdapter(OpenAIMixin):
+    config: GeminiConfig
+    provider_data_api_key_field: str = "gemini_api_key"
+    embedding_model_metadata: dict[str, dict[str, int]] = {
+        "models/text-embedding-004": {"embedding_dimension": 768, "context_length": 2048},
+        "models/gemini-embedding-001": {"embedding_dimension": 3072, "context_length": 2048},
+    }
+    def get_base_url(self):
+        return "https://generativelanguage.googleapis.com/v1beta/openai/"
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        """
+        Override embeddings method to handle Gemini's missing usage statistics.
+        Gemini's embedding API doesn't return usage information, so we provide default values.
+        """
+        # Prepare request parameters
+        request_params = {
+            "model": await self._get_provider_model_id(params.model),
+            "input": params.input,
+            "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
+            "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN,
+            "user": params.user if params.user is not None else NOT_GIVEN,
+        }
+        # Add extra_body if present
+        extra_body = params.model_extra
+        if extra_body:
+            request_params["extra_body"] = extra_body
+        # Call OpenAI embeddings API with properly typed parameters
+        response = await self.client.embeddings.create(**request_params)
+        data = []
+        for i, embedding_data in enumerate(response.data):
+            data.append(
+                OpenAIEmbeddingData(
+                    embedding=embedding_data.embedding,
+                    index=i,
+                )
+            )
+        # Gemini doesn't return usage statistics - use default values
+        if hasattr(response, "usage") and response.usage:
+            usage = OpenAIEmbeddingUsage(
+                prompt_tokens=response.usage.prompt_tokens,
+                total_tokens=response.usage.total_tokens,
+            )
+        else:
+            usage = OpenAIEmbeddingUsage(
+                prompt_tokens=0,
+                total_tokens=0,
+            )
+        return OpenAIEmbeddingsResponse(
+            data=data,
+            model=params.model,
+            usage=usage,
+        )

llama_stack/providers/remote/inference/groq/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .config import GroqConfig
+async def get_adapter_impl(config: GroqConfig, _deps):
+    # import dynamically so the import is used only when it is needed
+    from .groq import GroqInferenceAdapter
+    adapter = GroqInferenceAdapter(config=config)
+    return adapter

llama_stack/providers/remote/inference/groq/config.py ADDED Viewed

@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import BaseModel, Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+class GroqProviderDataValidator(BaseModel):
+    groq_api_key: str | None = Field(
+        default=None,
+        description="API key for Groq models",
+    )
+@json_schema_type
+class GroqConfig(RemoteInferenceProviderConfig):
+    url: str = Field(
+        default="https://api.groq.com",
+        description="The URL for the Groq AI server",
+    )
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
+        return {
+            "url": "https://api.groq.com",
+            "api_key": api_key,
+        }

llama_stack/providers/remote/inference/groq/groq.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.providers.remote.inference.groq.config import GroqConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+class GroqInferenceAdapter(OpenAIMixin):
+    config: GroqConfig
+    provider_data_api_key_field: str = "groq_api_key"
+    def get_base_url(self) -> str:
+        return f"{self.config.url}/openai/v1"

llama_stack/providers/remote/inference/llama_openai_compat/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .config import LlamaCompatConfig
+async def get_adapter_impl(config: LlamaCompatConfig, _deps):
+    # import dynamically so the import is used only when it is needed
+    from .llama import LlamaCompatInferenceAdapter
+    adapter = LlamaCompatInferenceAdapter(config=config)
+    return adapter

llama_stack/providers/remote/inference/llama_openai_compat/config.py ADDED Viewed

@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any
+from pydantic import BaseModel, Field
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.schema_utils import json_schema_type
+class LlamaProviderDataValidator(BaseModel):
+    llama_api_key: str | None = Field(
+        default=None,
+        description="API key for api.llama models",
+    )
+@json_schema_type
+class LlamaCompatConfig(RemoteInferenceProviderConfig):
+    openai_compat_api_base: str = Field(
+        default="https://api.llama.com/compat/v1/",
+        description="The URL for the Llama API server",
+    )
+    @classmethod
+    def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
+        return {
+            "openai_compat_api_base": "https://api.llama.com/compat/v1/",
+            "api_key": api_key,
+        }

llama_stack/providers/remote/inference/llama_openai_compat/llama.py ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.inference.inference import (
+    OpenAICompletion,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+logger = get_logger(name=__name__, category="inference::llama_openai_compat")
+class LlamaCompatInferenceAdapter(OpenAIMixin):
+    config: LlamaCompatConfig
+    provider_data_api_key_field: str = "llama_api_key"
+    """
+    Llama API Inference Adapter for Llama Stack.
+    """
+    def get_base_url(self) -> str:
+        """
+        Get the base URL for OpenAI mixin.
+        :return: The Llama API base URL
+        """
+        return self.config.openai_compat_api_base
+    async def openai_completion(
+        self,
+        params: OpenAICompletionRequestWithExtraBody,
+    ) -> OpenAICompletion:
+        raise NotImplementedError()
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()

llama_stack/providers/remote/inference/nvidia/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.inference import Inference
+from .config import NVIDIAConfig
+async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference:
+    # import dynamically so `llama stack list-deps` does not fail due to missing dependencies
+    from .nvidia import NVIDIAInferenceAdapter
+    if not isinstance(config, NVIDIAConfig):
+        raise RuntimeError(f"Unexpected config type: {type(config)}")
+    adapter = NVIDIAInferenceAdapter(config=config)
+    await adapter.initialize()
+    return adapter
+__all__ = ["get_adapter_impl", "NVIDIAConfig"]

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl