PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

llama_stack/providers/remote/inference/oci/oci.py ADDED Viewed

@@ -0,0 +1,162 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from collections.abc import Iterable
+from typing import Any
+import httpx
+import oci
+from oci.generative_ai.generative_ai_client import GenerativeAiClient
+from oci.generative_ai.models import ModelCollection
+from openai._base_client import DefaultAsyncHttpxClient
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
+from llama_stack.providers.remote.inference.oci.config import OCIConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import Model, ModelType
+logger = get_logger(name=__name__, category="inference::oci")
+OCI_AUTH_TYPE_INSTANCE_PRINCIPAL = "instance_principal"
+OCI_AUTH_TYPE_CONFIG_FILE = "config_file"
+VALID_OCI_AUTH_TYPES = [OCI_AUTH_TYPE_INSTANCE_PRINCIPAL, OCI_AUTH_TYPE_CONFIG_FILE]
+DEFAULT_OCI_REGION = "us-ashburn-1"
+MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS", "CHAT"]
+class OCIInferenceAdapter(OpenAIMixin):
+    config: OCIConfig
+    embedding_models: list[str] = []
+    async def initialize(self) -> None:
+        """Initialize and validate OCI configuration."""
+        if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES:
+            raise ValueError(
+                f"Invalid OCI authentication type: {self.config.oci_auth_type}."
+                f"Valid types are one of: {VALID_OCI_AUTH_TYPES}"
+            )
+        if not self.config.oci_compartment_id:
+            raise ValueError("OCI_COMPARTMENT_OCID is a required parameter. Either set in env variable or config.")
+    def get_base_url(self) -> str:
+        region = self.config.oci_region or DEFAULT_OCI_REGION
+        return f"https://inference.generativeai.{region}.oci.oraclecloud.com/20231130/actions/v1"
+    def get_api_key(self) -> str | None:
+        # OCI doesn't use API keys, it uses request signing
+        return "<NOTUSED>"
+    def get_extra_client_params(self) -> dict[str, Any]:
+        """
+        Get extra parameters for the AsyncOpenAI client, including OCI-specific auth and headers.
+        """
+        auth = self._get_auth()
+        compartment_id = self.config.oci_compartment_id or ""
+        return {
+            "http_client": DefaultAsyncHttpxClient(
+                auth=auth,
+                headers={
+                    "CompartmentId": compartment_id,
+                },
+            ),
+        }
+    def _get_oci_signer(self) -> oci.signer.AbstractBaseSigner | None:
+        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
+            return oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
+        return None
+    def _get_oci_config(self) -> dict:
+        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
+            config = {"region": self.config.oci_region}
+        elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
+            config = oci.config.from_file(self.config.oci_config_file_path, self.config.oci_config_profile)
+            if not config.get("region"):
+                raise ValueError(
+                    "Region not specified in config. Please specify in config or with OCI_REGION env variable."
+                )
+        return config
+    def _get_auth(self) -> httpx.Auth:
+        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
+            return OciInstancePrincipalAuth()
+        elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
+            return OciUserPrincipalAuth(
+                config_file=self.config.oci_config_file_path, profile_name=self.config.oci_config_profile
+            )
+        else:
+            raise ValueError(f"Invalid OCI authentication type: {self.config.oci_auth_type}")
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        """
+        List available models from OCI Generative AI service.
+        """
+        oci_config = self._get_oci_config()
+        oci_signer = self._get_oci_signer()
+        compartment_id = self.config.oci_compartment_id or ""
+        if oci_signer is None:
+            client = GenerativeAiClient(config=oci_config)
+        else:
+            client = GenerativeAiClient(config=oci_config, signer=oci_signer)
+        models: ModelCollection = client.list_models(
+            compartment_id=compartment_id,
+            # capability=MODEL_CAPABILITIES,
+            lifecycle_state="ACTIVE",
+        ).data
+        seen_models = set()
+        model_ids = []
+        for model in models.items:
+            if model.time_deprecated or model.time_on_demand_retired:
+                continue
+            if "UNKNOWN_ENUM_VALUE" in model.capabilities or "FINE_TUNE" in model.capabilities:
+                continue
+            # Use display_name + model_type as the key to avoid conflicts
+            model_key = (model.display_name, ModelType.llm)
+            if model_key in seen_models:
+                continue
+            seen_models.add(model_key)
+            model_ids.append(model.display_name)
+            if "TEXT_EMBEDDINGS" in model.capabilities:
+                self.embedding_models.append(model.display_name)
+        return model_ids
+    def construct_model_from_identifier(self, identifier: str) -> Model:
+        """
+        Construct a Model instance corresponding to the given identifier
+        Child classes can override this to customize model typing/metadata.
+        :param identifier: The provider's model identifier
+        :return: A Model instance
+        """
+        if identifier in self.embedding_models:
+            return Model(
+                provider_id=self.__provider_id__,  # type: ignore[attr-defined]
+                provider_resource_id=identifier,
+                identifier=identifier,
+                model_type=ModelType.embedding,
+            )
+        return Model(
+            provider_id=self.__provider_id__,  # type: ignore[attr-defined]
+            provider_resource_id=identifier,
+            identifier=identifier,
+            model_type=ModelType.llm,
+        )

llama_stack/providers/remote/inference/ollama/config.py CHANGED Viewed

@@ -6,20 +6,22 @@
 from typing import Any
-from pydantic import Field, SecretStr
+from pydantic import Field, HttpUrl, SecretStr
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-DEFAULT_OLLAMA_URL = "http://localhost:11434"
+DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
 class OllamaImplConfig(RemoteInferenceProviderConfig):
     auth_credential: SecretStr | None = Field(default=None, exclude=True)
-    url: str = DEFAULT_OLLAMA_URL
+    base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
     @classmethod
-    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
+    def sample_run_config(
+        cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
+    ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
         }

llama_stack/providers/remote/inference/ollama/ollama.py CHANGED Viewed

@@ -9,15 +9,15 @@ import asyncio
 from ollama import AsyncClient as AsyncOllamaClient
-from llama_stack.apis.common.errors import UnsupportedModelError
-from llama_stack.apis.models import Model
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
     HealthResponse,
     HealthStatus,
+    Model,
+    UnsupportedModelError,
 )
-from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 logger = get_logger(name=__name__, category="inference::ollama")
@@ -28,6 +28,9 @@ class OllamaInferenceAdapter(OpenAIMixin):
     # automatically set by the resolver when instantiating the provider
     __provider_id__: str
+    # Ollama does not support the stream_options parameter
+    supports_stream_options: bool = False
     embedding_model_metadata: dict[str, dict[str, int]] = {
         "all-minilm:l6-v2": {
             "embedding_dimension": 384,
@@ -55,17 +58,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
         # ollama client attaches itself to the current event loop (sadly?)
         loop = asyncio.get_running_loop()
         if loop not in self._clients:
-            self._clients[loop] = AsyncOllamaClient(host=self.config.url)
+            # Ollama client expects base URL without /v1 suffix
+            base_url_str = str(self.config.base_url)
+            if base_url_str.endswith("/v1"):
+                host = base_url_str[:-3]
+            else:
+                host = base_url_str
+            self._clients[loop] = AsyncOllamaClient(host=host)
         return self._clients[loop]
     def get_api_key(self):
         return "NO KEY REQUIRED"
     def get_base_url(self):
-        return self.config.url.rstrip("/") + "/v1"
+        return str(self.config.base_url)
     async def initialize(self) -> None:
-        logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
+        logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
         r = await self.health()
         if r["status"] == HealthStatus.ERROR:
             logger.warning(

llama_stack/providers/remote/inference/openai/config.py CHANGED Viewed

@@ -6,10 +6,10 @@
 from typing import Any
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 class OpenAIProviderDataValidator(BaseModel):
@@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
 @json_schema_type
 class OpenAIConfig(RemoteInferenceProviderConfig):
-    base_url: str = Field(
-        default="https://api.openai.com/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.openai.com/v1"),
         description="Base URL for OpenAI API",
     )

llama_stack/providers/remote/inference/openai/openai.py CHANGED Viewed

@@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin):
         Returns the OpenAI API base URL from the configuration.
         """
-        return self.config.base_url
+        return str(self.config.base_url)

llama_stack/providers/remote/inference/passthrough/__init__.py CHANGED Viewed

@@ -10,8 +10,8 @@ from .config import PassthroughImplConfig
 class PassthroughProviderDataValidator(BaseModel):
-    url: str
-    api_key: str
+    passthrough_url: str
+    passthrough_api_key: str
 async def get_adapter_impl(config: PassthroughImplConfig, _deps):

llama_stack/providers/remote/inference/passthrough/config.py CHANGED Viewed

@@ -6,29 +6,24 @@
 from typing import Any
-from pydantic import Field, SecretStr
+from pydantic import Field, HttpUrl
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 @json_schema_type
 class PassthroughImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
         description="The URL for the passthrough endpoint",
     )
-    api_key: SecretStr | None = Field(
-        default=None,
-        description="API Key for the passthrouth endpoint",
-    )
     @classmethod
     def sample_run_config(
-        cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
+        cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
     ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
             "api_key": api_key,
         }

llama_stack/providers/remote/inference/passthrough/passthrough.py CHANGED Viewed

@@ -5,12 +5,14 @@
 # the root directory of this source tree.
 from collections.abc import AsyncIterator
-from typing import Any
-from llama_stack_client import AsyncLlamaStackClient
+from openai import AsyncOpenAI
-from llama_stack.apis.inference import (
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.providers.utils.inference.stream_utils import wrap_async_stream
+from llama_stack_api import (
     Inference,
+    Model,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -19,104 +21,124 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack.apis.models import Model
-from llama_stack.core.library_client import convert_pydantic_to_json_value
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from .config import PassthroughImplConfig
-class PassthroughInferenceAdapter(Inference):
+class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
     def __init__(self, config: PassthroughImplConfig) -> None:
-        ModelRegistryHelper.__init__(self)
         self.config = config
+    async def initialize(self) -> None:
+        pass
+    async def shutdown(self) -> None:
+        pass
     async def unregister_model(self, model_id: str) -> None:
         pass
     async def register_model(self, model: Model) -> Model:
         return model
-    def _get_client(self) -> AsyncLlamaStackClient:
-        passthrough_url = None
-        passthrough_api_key = None
-        provider_data = None
-        if self.config.url is not None:
-            passthrough_url = self.config.url
-        else:
-            provider_data = self.get_request_provider_data()
-            if provider_data is None or not provider_data.passthrough_url:
-                raise ValueError(
-                    'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
-                )
-            passthrough_url = provider_data.passthrough_url
-        if self.config.api_key is not None:
-            passthrough_api_key = self.config.api_key.get_secret_value()
-        else:
-            provider_data = self.get_request_provider_data()
-            if provider_data is None or not provider_data.passthrough_api_key:
-                raise ValueError(
-                    'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
-                )
-            passthrough_api_key = provider_data.passthrough_api_key
-        return AsyncLlamaStackClient(
-            base_url=passthrough_url,
-            api_key=passthrough_api_key,
-            provider_data=provider_data,
+    async def list_models(self) -> list[Model]:
+        """List models by calling the downstream /v1/models endpoint."""
+        client = self._get_openai_client()
+        response = await client.models.list()
+        # Convert from OpenAI format to Llama Stack Model format
+        models = []
+        for model_data in response.data:
+            downstream_model_id = model_data.id
+            custom_metadata = getattr(model_data, "custom_metadata", {}) or {}
+            # Prefix identifier with provider ID for local registry
+            local_identifier = f"{self.__provider_id__}/{downstream_model_id}"
+            model = Model(
+                identifier=local_identifier,
+                provider_id=self.__provider_id__,
+                provider_resource_id=downstream_model_id,
+                model_type=custom_metadata.get("model_type", "llm"),
+                metadata=custom_metadata,
+            )
+            models.append(model)
+        return models
+    async def should_refresh_models(self) -> bool:
+        """Passthrough should refresh models since they come from downstream dynamically."""
+        return self.config.refresh_models
+    def _get_openai_client(self) -> AsyncOpenAI:
+        """Get an AsyncOpenAI client configured for the downstream server."""
+        base_url = self._get_passthrough_url()
+        api_key = self._get_passthrough_api_key()
+        return AsyncOpenAI(
+            base_url=f"{base_url.rstrip('/')}/v1",
+            api_key=api_key,
         )
-    async def openai_embeddings(
-        self,
-        params: OpenAIEmbeddingsRequestWithExtraBody,
-    ) -> OpenAIEmbeddingsResponse:
-        raise NotImplementedError()
+    def _get_passthrough_url(self) -> str:
+        """Get the passthrough URL from config or provider data."""
+        if self.config.base_url is not None:
+            return str(self.config.base_url)
+        provider_data = self.get_request_provider_data()
+        if provider_data is None:
+            raise ValueError(
+                'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
+            )
+        return provider_data.passthrough_url
+    def _get_passthrough_api_key(self) -> str:
+        """Get the passthrough API key from config or provider data."""
+        if self.config.auth_credential is not None:
+            return self.config.auth_credential.get_secret_value()
+        provider_data = self.get_request_provider_data()
+        if provider_data is None:
+            raise ValueError(
+                'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
+            )
+        return provider_data.passthrough_api_key
     async def openai_completion(
         self,
         params: OpenAICompletionRequestWithExtraBody,
-    ) -> OpenAICompletion:
-        client = self._get_client()
-        model_obj = await self.model_store.get_model(params.model)
-        params = params.model_copy()
-        params.model = model_obj.provider_resource_id
+    ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
+        """Forward completion request to downstream using OpenAI client."""
+        client = self._get_openai_client()
         request_params = params.model_dump(exclude_none=True)
+        response = await client.completions.create(**request_params)
+        if params.stream:
+            return wrap_async_stream(response)
-        return await client.inference.openai_completion(**request_params)
+        return response  # type: ignore[return-value]
     async def openai_chat_completion(
         self,
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        client = self._get_client()
-        model_obj = await self.model_store.get_model(params.model)
+        """Forward chat completion request to downstream using OpenAI client."""
+        client = self._get_openai_client()
+        request_params = params.model_dump(exclude_none=True)
+        response = await client.chat.completions.create(**request_params)
-        params = params.model_copy()
-        params.model = model_obj.provider_resource_id
+        if params.stream:
+            return wrap_async_stream(response)
-        request_params = params.model_dump(exclude_none=True)
+        return response  # type: ignore[return-value]
-        return await client.inference.openai_chat_completion(**request_params)
-    def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
-        json_params = {}
-        for key, value in request_params.items():
-            json_input = convert_pydantic_to_json_value(value)
-            if isinstance(json_input, dict):
-                json_input = {k: v for k, v in json_input.items() if v is not None}
-            elif isinstance(json_input, list):
-                json_input = [x for x in json_input if x is not None]
-                new_input = []
-                for x in json_input:
-                    if isinstance(x, dict):
-                        x = {k: v for k, v in x.items() if v is not None}
-                    new_input.append(x)
-                json_input = new_input
-            json_params[key] = json_input
-        return json_params
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        """Forward embeddings request to downstream using OpenAI client."""
+        client = self._get_openai_client()
+        request_params = params.model_dump(exclude_none=True)
+        response = await client.embeddings.create(**request_params)
+        return response  # type: ignore

llama_stack/providers/remote/inference/runpod/config.py CHANGED Viewed

@@ -6,15 +6,22 @@
 from typing import Any
-from pydantic import Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
+class RunpodProviderDataValidator(BaseModel):
+    runpod_api_token: str | None = Field(
+        default=None,
+        description="API token for RunPod models",
+    )
 @json_schema_type
 class RunpodImplConfig(RemoteInferenceProviderConfig):
-    url: str | None = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
         description="The URL for the Runpod model serving endpoint",
     )
@@ -27,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
-            "url": "${env.RUNPOD_URL:=}",
-            "api_token": "${env.RUNPOD_API_TOKEN}",
+            "base_url": "${env.RUNPOD_URL:=}",
+            "api_token": "${env.RUNPOD_API_TOKEN:=}",
         }

llama_stack/providers/remote/inference/runpod/runpod.py CHANGED Viewed

@@ -4,13 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from collections.abc import AsyncIterator
-from llama_stack.apis.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAIChatCompletionRequestWithExtraBody,
-)
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from .config import RunpodImplConfig
@@ -24,19 +17,8 @@ class RunpodInferenceAdapter(OpenAIMixin):
     """
     config: RunpodImplConfig
+    provider_data_api_key_field: str = "runpod_api_token"
     def get_base_url(self) -> str:
         """Get base URL for OpenAI client."""
-        return self.config.url
-    async def openai_chat_completion(
-        self,
-        params: OpenAIChatCompletionRequestWithExtraBody,
-    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        """Override to add RunPod-specific stream_options requirement."""
-        params = params.model_copy()
-        if params.stream and not params.stream_options:
-            params.stream_options = {"include_usage": True}
-        return await super().openai_chat_completion(params)
+        return str(self.config.base_url)

llama_stack/providers/remote/inference/sambanova/config.py CHANGED Viewed

@@ -6,10 +6,10 @@
 from typing import Any
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 class SambaNovaProviderDataValidator(BaseModel):
@@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
 @json_schema_type
 class SambaNovaImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.sambanova.ai/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.sambanova.ai/v1"),
         description="The URL for the SambaNova AI server",
     )
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.sambanova.ai/v1",
+            "base_url": "https://api.sambanova.ai/v1",
             "api_key": api_key,
         }

llama_stack/providers/remote/inference/sambanova/sambanova.py CHANGED Viewed

@@ -25,4 +25,4 @@ class SambaNovaInferenceAdapter(OpenAIMixin):
         :return: The SambaNova base URL
         """
-        return self.config.url
+        return str(self.config.base_url)

llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl