PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

llama_stack/providers/utils/responses/responses_store.py CHANGED Viewed

@@ -4,25 +4,22 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.agents import (
-    Order,
-)
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
+from llama_stack.log import get_logger
+from llama_stack_api import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
+    OpenAIMessageParam,
     OpenAIResponseInput,
     OpenAIResponseObject,
     OpenAIResponseObjectWithInput,
+    Order,
 )
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
-from llama_stack.log import get_logger
-from ..sqlstore.api import ColumnDefinition, ColumnType
-from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import sqlstore_impl
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 logger = get_logger(name=__name__, category="openai_responses")
@@ -252,19 +249,12 @@ class ResponsesStore:
         # Serialize messages to dict format for JSON storage
         messages_data = [msg.model_dump() for msg in messages]
-        # Upsert: try insert first, update if exists
-        try:
-            await self.sql_store.insert(
-                table="conversation_messages",
-                data={"conversation_id": conversation_id, "messages": messages_data},
-            )
-        except Exception:
-            # If insert fails due to ID conflict, update existing record
-            await self.sql_store.update(
-                table="conversation_messages",
-                data={"messages": messages_data},
-                where={"conversation_id": conversation_id},
-            )
+        await self.sql_store.upsert(
+            table="conversation_messages",
+            data={"conversation_id": conversation_id, "messages": messages_data},
+            conflict_columns=["conversation_id"],
+            update_columns=["messages"],
+        )
         logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")

llama_stack/providers/utils/scoring/aggregation_utils.py CHANGED Viewed

@@ -6,8 +6,7 @@
 import statistics
 from typing import Any
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import AggregationFunctionType
+from llama_stack_api import AggregationFunctionType, ScoringResultRow
 def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:

llama_stack/providers/utils/scoring/base_scoring_fn.py CHANGED Viewed

@@ -6,9 +6,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
-from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFn
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
+from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
 class BaseScoringFn(ABC):

llama_stack/providers/utils/tools/mcp.py CHANGED Viewed

@@ -15,18 +15,55 @@ from mcp import types as mcp_types
 from mcp.client.sse import sse_client
 from mcp.client.streamable_http import streamablehttp_client
-from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem
-from llama_stack.apis.tools import (
+from llama_stack.core.datatypes import AuthenticationRequiredError
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.tools.ttl_dict import TTLDict
+from llama_stack_api import (
+    ImageContentItem,
+    InterleavedContentItem,
     ListToolDefsResponse,
+    TextContentItem,
     ToolDef,
     ToolInvocationResult,
+    _URLOrData,
 )
-from llama_stack.core.datatypes import AuthenticationRequiredError
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.tools.ttl_dict import TTLDict
 logger = get_logger(__name__, category="tools")
+def prepare_mcp_headers(base_headers: dict[str, str] | None, authorization: str | None) -> dict[str, str]:
+    """
+    Prepare headers for MCP requests with authorization support.
+    Args:
+        base_headers: Base headers dictionary (can be None)
+        authorization: OAuth access token (without "Bearer " prefix)
+    Returns:
+        Headers dictionary with Authorization header if token provided
+    Raises:
+        ValueError: If Authorization header is specified in the headers dict (security risk)
+    """
+    headers = dict(base_headers or {})
+    # Security check: reject any Authorization header in the headers dict
+    # Users must use the authorization parameter instead to avoid security risks
+    existing_keys_lower = {k.lower() for k in headers.keys()}
+    if "authorization" in existing_keys_lower:
+        raise ValueError(
+            "For security reasons, Authorization header cannot be passed via 'headers'. "
+            "Please use the 'authorization' parameter instead."
+        )
+    # Add Authorization header if token provided
+    if authorization:
+        # OAuth access token - add "Bearer " prefix
+        headers["Authorization"] = f"Bearer {authorization}"
+    return headers
 protocol_cache = TTLDict(ttl_seconds=3600)
@@ -49,7 +86,10 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
         try:
             client = streamablehttp_client
             if strategy == MCPProtol.SSE:
-                client = sse_client
+                # sse_client and streamablehttp_client have different signatures, but both
+                # are called the same way here, so we cast to Any to avoid type errors
+                client = cast(Any, sse_client)
             async with client(endpoint, headers=headers) as client_streams:
                 async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
                     await session.initialize()
@@ -107,9 +147,29 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
                 raise
-async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse:
+async def list_mcp_tools(
+    endpoint: str,
+    headers: dict[str, str] | None = None,
+    authorization: str | None = None,
+) -> ListToolDefsResponse:
+    """List tools available from an MCP server.
+    Args:
+        endpoint: MCP server endpoint URL
+        headers: Optional base headers to include
+        authorization: Optional OAuth access token (just the token, not "Bearer <token>")
+    Returns:
+        List of tool definitions from the MCP server
+    Raises:
+        ValueError: If Authorization is found in the headers parameter
+    """
+    # Prepare headers with authorization handling
+    final_headers = prepare_mcp_headers(headers, authorization)
     tools = []
-    async with client_wrapper(endpoint, headers) as session:
+    async with client_wrapper(endpoint, final_headers) as session:
         tools_result = await session.list_tools()
         for tool in tools_result.tools:
             tools.append(
@@ -127,9 +187,31 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
 async def invoke_mcp_tool(
-    endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any]
+    endpoint: str,
+    tool_name: str,
+    kwargs: dict[str, Any],
+    headers: dict[str, str] | None = None,
+    authorization: str | None = None,
 ) -> ToolInvocationResult:
-    async with client_wrapper(endpoint, headers) as session:
+    """Invoke an MCP tool with the given arguments.
+    Args:
+        endpoint: MCP server endpoint URL
+        tool_name: Name of the tool to invoke
+        kwargs: Tool invocation arguments
+        headers: Optional base headers to include
+        authorization: Optional OAuth access token (just the token, not "Bearer <token>")
+    Returns:
+        Tool invocation result with content and error information
+    Raises:
+        ValueError: If Authorization header is found in the headers parameter
+    """
+    # Prepare headers with authorization handling
+    final_headers = prepare_mcp_headers(headers, authorization)
+    async with client_wrapper(endpoint, final_headers) as session:
         result = await session.call_tool(tool_name, kwargs)
         content: list[InterleavedContentItem] = []
@@ -137,7 +219,7 @@ async def invoke_mcp_tool(
             if isinstance(item, mcp_types.TextContent):
                 content.append(TextContentItem(text=item.text))
             elif isinstance(item, mcp_types.ImageContent):
-                content.append(ImageContentItem(image=item.data))
+                content.append(ImageContentItem(image=_URLOrData(data=item.data)))
             elif isinstance(item, mcp_types.EmbeddedResource):
                 logger.warning(f"EmbeddedResource is not supported: {item}")
             else:

llama_stack/telemetry/constants.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""
+This file contains constants used for naming data captured for telemetry.
+This is used to ensure that the data captured for telemetry is consistent and can be used to
+identify and correlate data. If custom telemetry data is added to llama stack, please add
+constants for it here.
+"""
+llama_stack_prefix = "llama_stack"
+# Safety Attributes
+RUN_SHIELD_OPERATION_NAME = "run_shield"
+SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
+SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
+SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
+SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
+SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
+SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
+SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"

llama_stack/telemetry/helpers.py ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import json
+from opentelemetry import trace
+from llama_stack_api import OpenAIMessageParam, RunShieldResponse
+from .constants import (
+    RUN_SHIELD_OPERATION_NAME,
+    SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
+    SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
+    SAFETY_RESPONSE_METADATA_ATTRIBUTE,
+    SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
+    SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
+)
+def safety_span_name(shield_id: str) -> str:
+    return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
+# TODO: Consider using Wrapt to automatically instrument code
+# This is the industry standard way to package automatically instrumentation in python.
+def safety_request_span_attributes(
+    shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
+) -> None:
+    span = trace.get_current_span()
+    span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
+    messages_json = json.dumps([msg.model_dump() for msg in messages])
+    span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
+    if response.violation:
+        if response.violation.metadata:
+            metadata_json = json.dumps(response.violation.metadata)
+            span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
+        if response.violation.user_message:
+            span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
+        span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)

llama_stack/testing/api_recorder.py CHANGED Viewed

@@ -40,10 +40,12 @@ from openai.types.completion_choice import CompletionChoice
 from llama_stack.core.testing_context import get_test_context, is_debug_mode
 # update the "finish_reason" field, since its type definition is wrong (no None is accepted)
-CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
+CompletionChoice.model_fields["finish_reason"].annotation = cast(
+    type[Any] | None, Literal["stop", "length", "content_filter"] | None
+)
 CompletionChoice.model_rebuild()
-REPO_ROOT = Path(__file__).parent.parent.parent
+REPO_ROOT = Path(__file__).parent.parent.parent.parent
 DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common"
@@ -154,7 +156,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
     }
     # Include test_id for isolation, except for shared infrastructure endpoints
-    if parsed.path not in ("/api/tags", "/v1/models"):
+    if parsed.path not in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
         normalized["test_id"] = test_id
     normalized_json = json.dumps(normalized, sort_keys=True)
@@ -428,7 +430,7 @@ class ResponseStorage:
         # For model-list endpoints, include digest in filename to distinguish different model sets
         endpoint = request.get("endpoint")
-        if endpoint in ("/api/tags", "/v1/models"):
+        if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
             digest = _model_identifiers_digest(endpoint, response)
             response_file = f"models-{request_hash}-{digest}.json"
@@ -552,13 +554,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
         Supported endpoints:
         - '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
         - '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
+        - '/v1/openai/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
         Returns a list of unique identifiers or None if structure doesn't match.
         """
         if "models" in response["body"]:
             # ollama
             items = response["body"]["models"]
         else:
-            # openai
+            # openai or openai-style endpoints
             items = response["body"]
         idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
         return sorted(set(idents))
@@ -579,7 +582,7 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
     seen: dict[str, dict[str, Any]] = {}
     for rec in records:
         body = rec["response"]["body"]
-        if endpoint == "/v1/models":
+        if endpoint in ("/v1/models", "/v1/openai/v1/models"):
             for m in body:
                 key = m.id
                 seen[key] = m
@@ -597,19 +600,23 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
     if endpoint == "/api/tags":
         from ollama import ListResponse
-        body = ListResponse(models=ordered)
+        # Both cast(Any, ...) and type: ignore are needed here:
+        # - cast(Any, ...) attempts to bypass type checking on the argument
+        # - type: ignore is still needed because mypy checks the call site independently
+        #   and reports arg-type mismatch even after casting
+        body = ListResponse(models=cast(Any, ordered))  # type: ignore[arg-type]
     return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
 async def _patched_tool_invoke_method(
-    original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any]
+    original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
 ):
     """Patched version of tool runtime invoke_tool method for recording/replay."""
     global _current_mode, _current_storage
     if _current_mode == APIRecordingMode.LIVE or _current_storage is None:
         # Normal operation
-        return await original_method(self, tool_name, kwargs)
+        return await original_method(self, tool_name, kwargs, authorization=authorization)
     request_hash = normalize_tool_request(provider_name, tool_name, kwargs)
@@ -627,7 +634,7 @@ async def _patched_tool_invoke_method(
     if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
         # Make the tool call and record it
-        result = await original_method(self, tool_name, kwargs)
+        result = await original_method(self, tool_name, kwargs, authorization=authorization)
         request_data = {
             "test_id": get_test_context(),
@@ -659,7 +666,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
         logger.info(f"  Test context: {get_test_context()}")
     if mode == APIRecordingMode.LIVE or storage is None:
-        if endpoint == "/v1/models":
+        if endpoint in ("/v1/models", "/v1/openai/v1/models"):
             return original_method(self, *args, **kwargs)
         else:
             return await original_method(self, *args, **kwargs)
@@ -693,7 +700,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
     recording = None
     if mode == APIRecordingMode.REPLAY or mode == APIRecordingMode.RECORD_IF_MISSING:
         # Special handling for model-list endpoints: merge all recordings with this hash
-        if endpoint in ("/api/tags", "/v1/models"):
+        if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
             records = storage._model_list_responses(request_hash)
             recording = _combine_model_list_responses(endpoint, records)
         else:
@@ -733,13 +740,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
             )
     if mode == APIRecordingMode.RECORD or (mode == APIRecordingMode.RECORD_IF_MISSING and not recording):
-        if endpoint == "/v1/models":
+        if endpoint in ("/v1/models", "/v1/openai/v1/models"):
             response = original_method(self, *args, **kwargs)
         else:
             response = await original_method(self, *args, **kwargs)
         # we want to store the result of the iterator, not the iterator itself
-        if endpoint == "/v1/models":
+        if endpoint in ("/v1/models", "/v1/openai/v1/models"):
             response = [m async for m in response]
         request_data = {
@@ -878,9 +885,11 @@ def patch_inference_clients():
     OllamaAsyncClient.list = patched_ollama_list
     # Create patched methods for tool runtimes
-    async def patched_tavily_invoke_tool(self, tool_name: str, kwargs: dict[str, Any]):
+    async def patched_tavily_invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ):
         return await _patched_tool_invoke_method(
-            _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs
+            _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs, authorization=authorization
         )
     # Apply tool runtime patches

{llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama_stack
-Version: 0.3.5
+Version: 0.4.0
 Summary: Llama Stack
 Author-email: Meta Llama <llama-oss@meta.com>
 License: MIT
@@ -16,20 +16,20 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: PyYAML>=6.0
 Requires-Dist: aiohttp
 Requires-Dist: fastapi<1.0,>=0.115.0
 Requires-Dist: fire
 Requires-Dist: httpx
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: jsonschema
-Requires-Dist: llama-stack-client==0.3.5
-Requires-Dist: openai>=1.107
+Requires-Dist: llama-stack-api
+Requires-Dist: openai>=2.5.0
 Requires-Dist: prompt-toolkit
 Requires-Dist: python-dotenv
 Requires-Dist: pyjwt[crypto]>=2.10.0
 Requires-Dist: pydantic>=2.11.9
 Requires-Dist: rich
-Requires-Dist: starlette
 Requires-Dist: termcolor
 Requires-Dist: tiktoken
 Requires-Dist: pillow
@@ -42,11 +42,11 @@ Requires-Dist: aiosqlite>=0.21.0
 Requires-Dist: asyncpg
 Requires-Dist: sqlalchemy[asyncio]>=2.0.41
 Requires-Dist: starlette>=0.49.1
-Provides-Extra: ui
-Requires-Dist: streamlit; extra == "ui"
-Requires-Dist: pandas; extra == "ui"
-Requires-Dist: llama-stack-client==0.3.5; extra == "ui"
-Requires-Dist: streamlit-option-menu; extra == "ui"
+Requires-Dist: psycopg2-binary
+Requires-Dist: tornado>=6.5.3
+Requires-Dist: urllib3>=2.6.0
+Provides-Extra: client
+Requires-Dist: llama-stack-client==0.4.0; extra == "client"
 Dynamic: license-file
 # Llama Stack
@@ -71,9 +71,9 @@ curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh
 ### Overview
-Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
+Llama Stack defines and standardizes the core building blocks that simplify AI application development. It provides a unified set of APIs with implementations from leading service providers. More specifically, it provides:
-- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
+- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals.
 - **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
 - **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment.
 - **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android.
@@ -88,76 +88,81 @@ Llama Stack standardizes the core building blocks that simplify AI application d
   />
 </div>
-### Llama Stack Benefits
-- **Flexible Options**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
+#### Llama Stack Benefits
+- **Flexibility**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
 - **Consistent Experience**: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
-- **Robust Ecosystem**: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
+- **Robust Ecosystem**: Llama Stack is integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
-By reducing friction and complexity, Llama Stack empowers developers to focus on what they do best: building transformative generative AI applications.
+For more information, see the [Benefits of Llama Stack](https://llamastack.github.io/docs/latest/concepts/architecture#benefits-of-llama-stack) documentation.
 ### API Providers
 Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
 Please checkout for [full list](https://llamastack.github.io/docs/providers)
-| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
-|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
-|    Meta Reference    | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-|      SambaNova       | Hosted | | ✅ | | ✅ | | | | |
-|       Cerebras       | Hosted | | ✅ | | | | | | |
-|      Fireworks       | Hosted | ✅ | ✅ | ✅ | | | | | |
-|     AWS Bedrock      | Hosted | | ✅ | | ✅ | | | | |
-|       Together       | Hosted | ✅ | ✅ | | ✅ | | | | |
-|         Groq         | Hosted | | ✅ | | | | | | |
-|        Ollama        | Single Node | | ✅ | | | | | | |
-|         TGI          | Hosted/Single Node | | ✅ | | | | | | |
-|      NVIDIA NIM      | Hosted/Single Node | | ✅ | | ✅ | | | | |
-|       ChromaDB       | Hosted/Single Node | | | ✅ | | | | | |
-|        Milvus        | Hosted/Single Node | | | ✅ | | | | | |
-|        Qdrant        | Hosted/Single Node | | | ✅ | | | | | |
-|       Weaviate       | Hosted/Single Node | | | ✅ | | | | | |
-|      SQLite-vec      | Single Node | | | ✅ | | | | | |
-|      PG Vector       | Single Node | | | ✅ | | | | | |
-|  PyTorch ExecuTorch  | On-device iOS | ✅ | ✅ | | | | | | |
-|         vLLM         | Single Node | | ✅ | | | | | | |
-|        OpenAI        | Hosted | | ✅ | | | | | | |
-|      Anthropic       | Hosted | | ✅ | | | | | | |
-|        Gemini        | Hosted | | ✅ | | | | | | |
-|       WatsonX        | Hosted | | ✅ | | | | | | |
-|     HuggingFace      | Single Node | | | | | | ✅ | | ✅ |
-|      TorchTune       | Single Node | | | | | | ✅ | | |
-|     NVIDIA NEMO      | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
-|        NVIDIA        | Hosted | | | | | | ✅ | ✅ | ✅ |
+|    API Provider      | Environments | Agents | Inference | VectorIO | Safety | Post Training | Eval | DatasetIO |
+|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:-------------:|:----:|:--------:|
+|    Meta Reference    | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+|      SambaNova       | Hosted | | ✅ | | ✅ | | | |
+|       Cerebras       | Hosted | | ✅ | | | | | |
+|      Fireworks       | Hosted | ✅ | ✅ | ✅ | | | | |
+|     AWS Bedrock      | Hosted | | ✅ | | ✅ | | | |
+|       Together       | Hosted | ✅ | ✅ | | ✅ | | | |
+|         Groq         | Hosted | | ✅ | | | | | |
+|        Ollama        | Single Node | | ✅ | | | | | |
+|         TGI          | Hosted/Single Node | | ✅ | | | | | |
+|      NVIDIA NIM      | Hosted/Single Node | | ✅ | | ✅ | | | |
+|       ChromaDB       | Hosted/Single Node | | | ✅ | | | | |
+|        Milvus        | Hosted/Single Node | | | ✅ | | | | |
+|        Qdrant        | Hosted/Single Node | | | ✅ | | | | |
+|       Weaviate       | Hosted/Single Node | | | ✅ | | | | |
+|      SQLite-vec      | Single Node | | | ✅ | | | | |
+|      PG Vector       | Single Node | | | ✅ | | | | |
+|  PyTorch ExecuTorch  | On-device iOS | ✅ | ✅ | | | | | |
+|         vLLM         | Single Node | | ✅ | | | | | |
+|        OpenAI        | Hosted | | ✅ | | | | | |
+|      Anthropic       | Hosted | | ✅ | | | | | |
+|        Gemini        | Hosted | | ✅ | | | | | |
+|       WatsonX        | Hosted | | ✅ | | | | | |
+|     HuggingFace      | Single Node | | | | | ✅ | | ✅ |
+|      TorchTune       | Single Node | | | | | ✅ | | |
+|     NVIDIA NEMO      | Hosted | | ✅ | ✅ | | ✅ | ✅ | ✅ |
+|        NVIDIA        | Hosted | | | | | ✅ | ✅ | ✅ |
 > **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation.
 ### Distributions
-A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
+A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario. For example, you can begin with a local setup of Ollama and seamlessly transition to production, with fireworks, without changing your application code.
 Here are some of the distributions we support:
 |               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
 |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
-|                Starter Distribution                 |           [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general)           |      [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/starter.html)      |
-|                Meta Reference                 |           [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general)           |      [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/meta-reference-gpu.html)      |
+|                Starter Distribution                 |           [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general)           |      [Guide](https://llamastack.github.io/docs/distributions/self_hosted_distro/starter)      |
+|                Meta Reference                 |           [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general)           |      [Guide](https://llamastack.github.io/docs/distributions/self_hosted_distro/meta-reference-gpu)      |
 |                   PostgreSQL                  |                [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general)                |                  |
+For full documentation on the Llama Stack distributions see the [Distributions Overview](https://llamastack.github.io/docs/distributions) page.
 ### Documentation
-Please checkout our [Documentation](https://llamastack.github.io/latest/index.html) page for more details.
+Please checkout our [Documentation](https://llamastack.github.io/docs) page for more details.
 * CLI references
-    * [llama (server-side) CLI Reference](https://llamastack.github.io/latest/references/llama_cli_reference/index.html): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
-    * [llama (client-side) CLI Reference](https://llamastack.github.io/latest/references/llama_stack_client_cli_reference.html): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
+    * [llama (server-side) CLI Reference](https://llamastack.github.io/docs/references/llama_cli_reference): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
+    * [llama (client-side) CLI Reference](https://llamastack.github.io/docs/references/llama_stack_client_cli_reference): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
 * Getting Started
-    * [Quick guide to start a Llama Stack server](https://llamastack.github.io/latest/getting_started/index.html).
+    * [Quick guide to start a Llama Stack server](https://llamastack.github.io/docs/getting_started/quickstart).
     * [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs
     * The complete Llama Stack lesson [Colab notebook](https://colab.research.google.com/drive/1dtVmxotBsI4cGZQNsJRYPrLiDeT0Wnwt) of the new [Llama 3.2 course on Deeplearning.ai](https://learn.deeplearning.ai/courses/introducing-multimodal-llama-3-2/lesson/8/llama-stack).
     * A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
 * [Contributing](CONTRIBUTING.md)
-    * [Adding a new API Provider](https://llamastack.github.io/latest/contributing/new_api_provider.html) to walk-through how to add a new API provider.
+    * [Adding a new API Provider](https://llamastack.github.io/docs/contributing/new_api_provider) to walk-through how to add a new API provider.
 ### Llama Stack Client SDKs
+Check out our client SDKs for connecting to a Llama Stack server in your preferred language.
 |  **Language** |  **Client SDK** | **Package** |
 | :----: | :----: | :----: |
 | Python |  [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/)
@@ -165,11 +170,8 @@ Please checkout our [Documentation](https://llamastack.github.io/latest/index.ht
 | Typescript   | [llama-stack-client-typescript](https://github.com/meta-llama/llama-stack-client-typescript) | [![NPM version](https://img.shields.io/npm/v/llama-stack-client.svg)](https://npmjs.org/package/llama-stack-client)
 | Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | [![Maven version](https://img.shields.io/maven-central/v/com.llama.llamastack/llama-stack-client-kotlin)](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
-Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
 You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
 ## 🌟 GitHub Star History
 ## Star History

llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl