PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (460) hide show

llama_stack/providers/utils/responses/responses_store.py CHANGED Viewed

@@ -4,25 +4,22 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.agents import (
-    Order,
-)
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
+from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
+from llama_stack.log import get_logger
+from llama_stack_api import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
+    OpenAIMessageParam,
     OpenAIResponseInput,
     OpenAIResponseObject,
     OpenAIResponseObjectWithInput,
+    Order,
 )
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
-from llama_stack.log import get_logger
-from ..sqlstore.api import ColumnDefinition, ColumnType
-from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import sqlstore_impl
+from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
 logger = get_logger(name=__name__, category="openai_responses")
@@ -252,19 +249,12 @@ class ResponsesStore:
         # Serialize messages to dict format for JSON storage
         messages_data = [msg.model_dump() for msg in messages]
-        # Upsert: try insert first, update if exists
-        try:
-            await self.sql_store.insert(
-                table="conversation_messages",
-                data={"conversation_id": conversation_id, "messages": messages_data},
-            )
-        except Exception:
-            # If insert fails due to ID conflict, update existing record
-            await self.sql_store.update(
-                table="conversation_messages",
-                data={"messages": messages_data},
-                where={"conversation_id": conversation_id},
-            )
+        await self.sql_store.upsert(
+            table="conversation_messages",
+            data={"conversation_id": conversation_id, "messages": messages_data},
+            conflict_columns=["conversation_id"],
+            update_columns=["messages"],
+        )
         logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")

llama_stack/providers/utils/scoring/aggregation_utils.py CHANGED Viewed

@@ -6,8 +6,7 @@
 import statistics
 from typing import Any
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import AggregationFunctionType
+from llama_stack_api import AggregationFunctionType, ScoringResultRow
 def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:

llama_stack/providers/utils/scoring/base_scoring_fn.py CHANGED Viewed

@@ -6,9 +6,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
-from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFn
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
+from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
 class BaseScoringFn(ABC):

llama_stack/providers/utils/tools/mcp.py CHANGED Viewed

@@ -15,18 +15,55 @@ from mcp import types as mcp_types
 from mcp.client.sse import sse_client
 from mcp.client.streamable_http import streamablehttp_client
-from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem
-from llama_stack.apis.tools import (
+from llama_stack.core.datatypes import AuthenticationRequiredError
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.tools.ttl_dict import TTLDict
+from llama_stack_api import (
+    ImageContentItem,
+    InterleavedContentItem,
     ListToolDefsResponse,
+    TextContentItem,
     ToolDef,
     ToolInvocationResult,
+    _URLOrData,
 )
-from llama_stack.core.datatypes import AuthenticationRequiredError
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.tools.ttl_dict import TTLDict
 logger = get_logger(__name__, category="tools")
+def prepare_mcp_headers(base_headers: dict[str, str] | None, authorization: str | None) -> dict[str, str]:
+    """
+    Prepare headers for MCP requests with authorization support.
+    Args:
+        base_headers: Base headers dictionary (can be None)
+        authorization: OAuth access token (without "Bearer " prefix)
+    Returns:
+        Headers dictionary with Authorization header if token provided
+    Raises:
+        ValueError: If Authorization header is specified in the headers dict (security risk)
+    """
+    headers = dict(base_headers or {})
+    # Security check: reject any Authorization header in the headers dict
+    # Users must use the authorization parameter instead to avoid security risks
+    existing_keys_lower = {k.lower() for k in headers.keys()}
+    if "authorization" in existing_keys_lower:
+        raise ValueError(
+            "For security reasons, Authorization header cannot be passed via 'headers'. "
+            "Please use the 'authorization' parameter instead."
+        )
+    # Add Authorization header if token provided
+    if authorization:
+        # OAuth access token - add "Bearer " prefix
+        headers["Authorization"] = f"Bearer {authorization}"
+    return headers
 protocol_cache = TTLDict(ttl_seconds=3600)
@@ -49,7 +86,10 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
         try:
             client = streamablehttp_client
             if strategy == MCPProtol.SSE:
-                client = sse_client
+                # sse_client and streamablehttp_client have different signatures, but both
+                # are called the same way here, so we cast to Any to avoid type errors
+                client = cast(Any, sse_client)
             async with client(endpoint, headers=headers) as client_streams:
                 async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
                     await session.initialize()
@@ -107,9 +147,29 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
                 raise
-async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse:
+async def list_mcp_tools(
+    endpoint: str,
+    headers: dict[str, str] | None = None,
+    authorization: str | None = None,
+) -> ListToolDefsResponse:
+    """List tools available from an MCP server.
+    Args:
+        endpoint: MCP server endpoint URL
+        headers: Optional base headers to include
+        authorization: Optional OAuth access token (just the token, not "Bearer <token>")
+    Returns:
+        List of tool definitions from the MCP server
+    Raises:
+        ValueError: If Authorization is found in the headers parameter
+    """
+    # Prepare headers with authorization handling
+    final_headers = prepare_mcp_headers(headers, authorization)
     tools = []
-    async with client_wrapper(endpoint, headers) as session:
+    async with client_wrapper(endpoint, final_headers) as session:
         tools_result = await session.list_tools()
         for tool in tools_result.tools:
             tools.append(
@@ -127,9 +187,31 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
 async def invoke_mcp_tool(
-    endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any]
+    endpoint: str,
+    tool_name: str,
+    kwargs: dict[str, Any],
+    headers: dict[str, str] | None = None,
+    authorization: str | None = None,
 ) -> ToolInvocationResult:
-    async with client_wrapper(endpoint, headers) as session:
+    """Invoke an MCP tool with the given arguments.
+    Args:
+        endpoint: MCP server endpoint URL
+        tool_name: Name of the tool to invoke
+        kwargs: Tool invocation arguments
+        headers: Optional base headers to include
+        authorization: Optional OAuth access token (just the token, not "Bearer <token>")
+    Returns:
+        Tool invocation result with content and error information
+    Raises:
+        ValueError: If Authorization header is found in the headers parameter
+    """
+    # Prepare headers with authorization handling
+    final_headers = prepare_mcp_headers(headers, authorization)
+    async with client_wrapper(endpoint, final_headers) as session:
         result = await session.call_tool(tool_name, kwargs)
         content: list[InterleavedContentItem] = []
@@ -137,7 +219,7 @@ async def invoke_mcp_tool(
             if isinstance(item, mcp_types.TextContent):
                 content.append(TextContentItem(text=item.text))
             elif isinstance(item, mcp_types.ImageContent):
-                content.append(ImageContentItem(image=item.data))
+                content.append(ImageContentItem(image=_URLOrData(data=item.data)))
             elif isinstance(item, mcp_types.EmbeddedResource):
                 logger.warning(f"EmbeddedResource is not supported: {item}")
             else:

llama_stack/providers/utils/vector_io/__init__.py CHANGED Viewed

@@ -3,3 +3,19 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from .vector_utils import (
+    WeightedInMemoryAggregator,
+    generate_chunk_id,
+    load_embedded_chunk_with_backward_compat,
+    proper_case,
+    sanitize_collection_name,
+)
+__all__ = [
+    "WeightedInMemoryAggregator",
+    "generate_chunk_id",
+    "load_embedded_chunk_with_backward_compat",
+    "proper_case",
+    "sanitize_collection_name",
+]

llama_stack/providers/utils/vector_io/vector_utils.py CHANGED Viewed

@@ -7,6 +7,9 @@
 import hashlib
 import re
 import uuid
+from typing import Any
+from llama_stack_api import EmbeddedChunk
 def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
@@ -154,3 +157,36 @@ class WeightedInMemoryAggregator:
             # Default to RRF for None, RRF, or any unknown types
             impact_factor = reranker_params.get("impact_factor", 60.0)
             return WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor)
+def load_embedded_chunk_with_backward_compat(
+    chunk_data: dict[str, Any],
+) -> EmbeddedChunk:
+    """
+    Load EmbeddedChunk data with backward compatibility for legacy field locations.
+    Handles migration from old format where embedding_model and embedding_dimension
+    were stored in chunk_metadata to current top-level format.
+    Args:
+        chunk_data: Dictionary containing chunk data to load
+    Returns:
+        EmbeddedChunk object with migrated data
+    """
+    # Migrate old data: extract embedding_model/embedding_dimension from chunk_metadata if missing
+    if "embedding_model" not in chunk_data:
+        chunk_metadata = chunk_data.get("chunk_metadata", {})
+        chunk_data["embedding_model"] = chunk_metadata.get("chunk_embedding_model", "unknown")
+    if "embedding_dimension" not in chunk_data:
+        chunk_metadata = chunk_data.get("chunk_metadata", {})
+        chunk_data["embedding_dimension"] = chunk_metadata.get(
+            "chunk_embedding_dimension", len(chunk_data.get("embedding", []))
+        )
+    # Ensure embedding field exists (required by EmbeddedChunk)
+    if "embedding" not in chunk_data:
+        chunk_data["embedding"] = []
+    return EmbeddedChunk(**chunk_data)

llama_stack/telemetry/constants.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""
+This file contains constants used for naming data captured for telemetry.
+This is used to ensure that the data captured for telemetry is consistent and can be used to
+identify and correlate data. If custom telemetry data is added to llama stack, please add
+constants for it here.
+"""
+llama_stack_prefix = "llama_stack"
+# Safety Attributes
+RUN_SHIELD_OPERATION_NAME = "run_shield"
+SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
+SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
+SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
+SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
+SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
+SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
+SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"

llama_stack/telemetry/helpers.py ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import json
+from opentelemetry import trace
+from llama_stack_api import OpenAIMessageParam, RunShieldResponse
+from .constants import (
+    RUN_SHIELD_OPERATION_NAME,
+    SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
+    SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
+    SAFETY_RESPONSE_METADATA_ATTRIBUTE,
+    SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
+    SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
+)
+def safety_span_name(shield_id: str) -> str:
+    return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
+# TODO: Consider using Wrapt to automatically instrument code
+# This is the industry standard way to package automatically instrumentation in python.
+def safety_request_span_attributes(
+    shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
+) -> None:
+    span = trace.get_current_span()
+    span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
+    messages_json = json.dumps([msg.model_dump() for msg in messages])
+    span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
+    if response.violation:
+        if response.violation.metadata:
+            metadata_json = json.dumps(response.violation.metadata)
+            span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
+        if response.violation.user_message:
+            span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
+        span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)

llama_stack/testing/api_recorder.py CHANGED Viewed

@@ -40,10 +40,12 @@ from openai.types.completion_choice import CompletionChoice
 from llama_stack.core.testing_context import get_test_context, is_debug_mode
 # update the "finish_reason" field, since its type definition is wrong (no None is accepted)
-CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
+CompletionChoice.model_fields["finish_reason"].annotation = cast(
+    type[Any] | None, Literal["stop", "length", "content_filter"] | None
+)
 CompletionChoice.model_rebuild()
-REPO_ROOT = Path(__file__).parent.parent.parent
+REPO_ROOT = Path(__file__).parent.parent.parent.parent
 DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common"
@@ -154,7 +156,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
     }
     # Include test_id for isolation, except for shared infrastructure endpoints
-    if parsed.path not in ("/api/tags", "/v1/models"):
+    if parsed.path not in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
         normalized["test_id"] = test_id
     normalized_json = json.dumps(normalized, sort_keys=True)
@@ -428,7 +430,7 @@ class ResponseStorage:
         # For model-list endpoints, include digest in filename to distinguish different model sets
         endpoint = request.get("endpoint")
-        if endpoint in ("/api/tags", "/v1/models"):
+        if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
             digest = _model_identifiers_digest(endpoint, response)
             response_file = f"models-{request_hash}-{digest}.json"
@@ -552,13 +554,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
         Supported endpoints:
         - '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
         - '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
+        - '/v1/openai/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
         Returns a list of unique identifiers or None if structure doesn't match.
         """
         if "models" in response["body"]:
             # ollama
             items = response["body"]["models"]
         else:
-            # openai
+            # openai or openai-style endpoints
             items = response["body"]
         idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
         return sorted(set(idents))
@@ -579,7 +582,7 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
     seen: dict[str, dict[str, Any]] = {}
     for rec in records:
         body = rec["response"]["body"]
-        if endpoint == "/v1/models":
+        if endpoint in ("/v1/models", "/v1/openai/v1/models"):
             for m in body:
                 key = m.id
                 seen[key] = m
@@ -597,19 +600,23 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
     if endpoint == "/api/tags":
         from ollama import ListResponse
-        body = ListResponse(models=ordered)
+        # Both cast(Any, ...) and type: ignore are needed here:
+        # - cast(Any, ...) attempts to bypass type checking on the argument
+        # - type: ignore is still needed because mypy checks the call site independently
+        #   and reports arg-type mismatch even after casting
+        body = ListResponse(models=cast(Any, ordered))  # type: ignore[arg-type]
     return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
 async def _patched_tool_invoke_method(
-    original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any]
+    original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
 ):
     """Patched version of tool runtime invoke_tool method for recording/replay."""
     global _current_mode, _current_storage
     if _current_mode == APIRecordingMode.LIVE or _current_storage is None:
         # Normal operation
-        return await original_method(self, tool_name, kwargs)
+        return await original_method(self, tool_name, kwargs, authorization=authorization)
     request_hash = normalize_tool_request(provider_name, tool_name, kwargs)
@@ -627,7 +634,7 @@ async def _patched_tool_invoke_method(
     if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
         # Make the tool call and record it
-        result = await original_method(self, tool_name, kwargs)
+        result = await original_method(self, tool_name, kwargs, authorization=authorization)
         request_data = {
             "test_id": get_test_context(),
@@ -659,7 +666,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
         logger.info(f"  Test context: {get_test_context()}")
     if mode == APIRecordingMode.LIVE or storage is None:
-        if endpoint == "/v1/models":
+        if endpoint in ("/v1/models", "/v1/openai/v1/models"):
             return original_method(self, *args, **kwargs)
         else:
             return await original_method(self, *args, **kwargs)
@@ -693,7 +700,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
     recording = None
     if mode == APIRecordingMode.REPLAY or mode == APIRecordingMode.RECORD_IF_MISSING:
         # Special handling for model-list endpoints: merge all recordings with this hash
-        if endpoint in ("/api/tags", "/v1/models"):
+        if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
             records = storage._model_list_responses(request_hash)
             recording = _combine_model_list_responses(endpoint, records)
         else:
@@ -733,13 +740,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
             )
     if mode == APIRecordingMode.RECORD or (mode == APIRecordingMode.RECORD_IF_MISSING and not recording):
-        if endpoint == "/v1/models":
+        if endpoint in ("/v1/models", "/v1/openai/v1/models"):
             response = original_method(self, *args, **kwargs)
         else:
             response = await original_method(self, *args, **kwargs)
         # we want to store the result of the iterator, not the iterator itself
-        if endpoint == "/v1/models":
+        if endpoint in ("/v1/models", "/v1/openai/v1/models"):
             response = [m async for m in response]
         request_data = {
@@ -878,9 +885,11 @@ def patch_inference_clients():
     OllamaAsyncClient.list = patched_ollama_list
     # Create patched methods for tool runtimes
-    async def patched_tavily_invoke_tool(self, tool_name: str, kwargs: dict[str, Any]):
+    async def patched_tavily_invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ):
         return await _patched_tool_invoke_method(
-            _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs
+            _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs, authorization=authorization
         )
     # Apply tool runtime patches

llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl