PyPI - llama-stack-api - Versions diffs - 0.5.2__tar.gz → 0.6.0__tar.gz - Mend

llama-stack-api 0.5.2tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

{llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama-stack-api
-Version: 0.5.2
+Version: 0.6.0
 Summary: API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs
 Author-email: Meta Llama <llama-oss@meta.com>
 License: MIT

{llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/__init__.py RENAMED Viewed

@@ -60,6 +60,7 @@ from .agents import (
     ResponseGuardrail,
     ResponseGuardrailSpec,
     ResponseItemInclude,
+    ResponseTruncation,
     RetrieveResponseRequest,
 )
 from .batches import (
@@ -93,14 +94,22 @@ from .common.content_types import (
     _URLOrData,
 )
 from .common.errors import (
+    BatchNotFoundError,
     ConflictError,
     ConnectorNotFoundError,
     ConnectorToolNotFoundError,
+    ConversationItemNotFoundError,
+    ConversationNotFoundError,
     DatasetNotFoundError,
-    InvalidConversationIdError,
+    InternalServerError,
+    InvalidParameterError,
     ModelNotFoundError,
     ModelTypeError,
+    OpenAIFileObjectNotFoundError,
     ResourceNotFoundError,
+    ResponseInputItemNotFoundError,
+    ResponseNotFoundError,
+    ServiceNotEnabledError,
     TokenValidationError,
     ToolGroupNotFoundError,
     UnsupportedModelError,
@@ -206,7 +215,8 @@ from .eval import (
     resolve_job_status_request,
     resolve_run_eval_request,
 )
-from .file_processors import FileProcessors, ProcessFileResponse
+from .file_processors import FileProcessors, ProcessFileRequest, ProcessFileResponse
+from .filters import COMPARISON_FILTER_TYPES, COMPOUND_FILTER_TYPES, ComparisonFilter, CompoundFilter, Filter
 from .files import (
     DeleteFileRequest,
     ExpiresAfter,
@@ -256,6 +266,7 @@ from .inference import (
     OpenAIChatCompletionUsage,
     OpenAIChatCompletionUsageCompletionTokensDetails,
     OpenAIChatCompletionUsagePromptTokensDetails,
+    OpenAIChatCompletionResponseMessage,
     OpenAIChoice,
     OpenAIChoiceDelta,
     OpenAIChoiceLogprobs,
@@ -335,6 +346,7 @@ from .openai_responses import (
     OpenAIResponseContentPartReasoningText,
     OpenAIResponseContentPartRefusal,
     OpenAIResponseError,
+    OpenAIResponseIncompleteDetails,
     OpenAIResponseInput,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputMessageContent,
@@ -535,15 +547,28 @@ from .tools import (
 from .validators import validate_embeddings_input_is_text
 from .vector_io import (
     Chunk,
+    ChunkForDeletion,
     ChunkMetadata,
+    DEFAULT_CHUNK_OVERLAP_TOKENS,
+    DEFAULT_CHUNK_SIZE_TOKENS,
+    DeleteChunksRequest,
     EmbeddedChunk,
+    InsertChunksRequest,
+    MAX_PAGINATION_LIMIT,
+    OpenAIAttachFileRequest,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
+    OpenAISearchVectorStoreRequest,
+    OpenAIUpdateVectorStoreFileRequest,
+    OpenAIUpdateVectorStoreRequest,
+    QueryChunksRequest,
     QueryChunksResponse,
     SearchRankingOptions,
     VectorIO,
     VectorStoreChunkingStrategy,
     VectorStoreChunkingStrategyAuto,
+    VectorStoreChunkingStrategyContextual,
+    VectorStoreChunkingStrategyContextualConfig,
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
     VectorStoreContent,
@@ -599,6 +624,7 @@ __all__ = [
     "ApprovalFilter",
     "BasicScoringFnParams",
     "Batches",
+    "BatchNotFoundError",
     "BatchObject",
     "CancelBatchRequest",
     "CancelTrainingJobRequest",
@@ -615,7 +641,11 @@ __all__ = [
     "ChatCompletionResponseEventType",
     "Checkpoint",
     "Chunk",
+    "ChunkForDeletion",
     "ChunkMetadata",
+    "DEFAULT_CHUNK_OVERLAP_TOKENS",
+    "DEFAULT_CHUNK_SIZE_TOKENS",
+    "DeleteChunksRequest",
     "EmbeddedChunk",
     "CommonBenchmarkFields",
     "ConflictError",
@@ -628,6 +658,8 @@ __all__ = [
     "Connector",
     "ConnectorNotFoundError",
     "ConnectorToolNotFoundError",
+    "ConversationItemNotFoundError",
+    "ConversationNotFoundError",
     "ConnectorInput",
     "Connectors",
     "ConnectorType",
@@ -694,6 +726,11 @@ __all__ = [
     "ExtraBodyField",
     "FileProcessors",
     "Files",
+    "Filter",
+    "ComparisonFilter",
+    "CompoundFilter",
+    "COMPARISON_FILTER_TYPES",
+    "COMPOUND_FILTER_TYPES",
     "Fp8QuantizationConfig",
     "clear_dynamic_schema_types",
     "get_schema_identifier",
@@ -707,13 +744,15 @@ __all__ = [
     "Inference",
     "InferenceProvider",
     "InlineProviderSpec",
+    "InsertChunksRequest",
     "Inspect",
     "InspectProviderRequest",
+    "InternalServerError",
     "Admin",
     "Int4QuantizationConfig",
     "InterleavedContent",
     "InterleavedContentItem",
-    "InvalidConversationIdError",
+    "InvalidParameterError",
     "is_generic_list",
     "is_type_optional",
     "is_type_union",
@@ -763,6 +802,7 @@ __all__ = [
     "ListToolsResponse",
     "LogProbConfig",
     "LoraFinetuningConfig",
+    "MAX_PAGINATION_LIMIT",
     "MCPListToolsTool",
     "Metadata",
     "Model",
@@ -801,6 +841,7 @@ __all__ = [
     "OpenAIChatCompletionToolChoiceFunctionTool",
     "OpenAIChatCompletionToolChoiceCustomTool",
     "OpenAIChatCompletionToolChoice",
+    "OpenAIChatCompletionResponseMessage",
     "OpenAIChoice",
     "OpenAIChoiceDelta",
     "OpenAIChoiceLogprobs",
@@ -822,6 +863,7 @@ __all__ = [
     "OpenAIFileDeleteResponse",
     "OpenAIFileFile",
     "OpenAIFileObject",
+    "OpenAIFileObjectNotFoundError",
     "OpenAIFilePurpose",
     "OpenAIFinishReason",
     "OpenAIImageURL",
@@ -830,6 +872,10 @@ __all__ = [
     "OpenAIMessageParam",
     "OpenAIModel",
     "Order",
+    "OpenAIAttachFileRequest",
+    "OpenAISearchVectorStoreRequest",
+    "OpenAIUpdateVectorStoreFileRequest",
+    "OpenAIUpdateVectorStoreRequest",
     "OpenAIResponseAnnotationCitation",
     "OpenAIResponseAnnotationContainerFileCitation",
     "OpenAIResponseAnnotationFileCitation",
@@ -841,6 +887,7 @@ __all__ = [
     "OpenAIResponseContentPartReasoningText",
     "OpenAIResponseContentPartRefusal",
     "OpenAIResponseError",
+    "OpenAIResponseIncompleteDetails",
     "OpenAIResponseFormatJSONObject",
     "OpenAIResponseFormatJSONSchema",
     "OpenAIResponseFormatParam",
@@ -936,6 +983,7 @@ __all__ = [
     "ParamType",
     "parse_type",
     "PostTraining",
+    "ProcessFileRequest",
     "ProcessFileResponse",
     "PostTrainingMetric",
     "PostTrainingJob",
@@ -961,6 +1009,7 @@ __all__ = [
     "QATFinetuningConfig",
     "QuantizationConfig",
     "QuantizationType",
+    "QueryChunksRequest",
     "QueryChunksResponse",
     "RAGDocument",
     "RAGQueryConfig",
@@ -980,12 +1029,16 @@ __all__ = [
     "RerankResponse",
     "Resource",
     "ResourceNotFoundError",
+    "ResponseInputItemNotFoundError",
+    "ResponseNotFoundError",
     "ResourceType",
     "ResponseFormat",
     "ResponseFormatType",
     "ResponseGuardrail",
     "ResponseGuardrailSpec",
     "ResponseItemInclude",
+    "ResponseTruncation",
+    "ResponseNotFoundError",
     "RetrieveFileContentRequest",
     "RetrieveFileRequest",
     "RouteInfo",
@@ -1020,6 +1073,7 @@ __all__ = [
     "SchemaInfo",
     "SchemaOptions",
     "SearchRankingOptions",
+    "ServiceNotEnabledError",
     "Shield",
     "ShieldInput",
     "ShieldStore",
@@ -1065,6 +1119,8 @@ __all__ = [
     "VectorStore",
     "VectorStoreChunkingStrategy",
     "VectorStoreChunkingStrategyAuto",
+    "VectorStoreChunkingStrategyContextual",
+    "VectorStoreChunkingStrategyContextualConfig",
     "VectorStoreChunkingStrategyStatic",
     "VectorStoreChunkingStrategyStaticConfig",
     "VectorStoreContent",
@@ -1097,4 +1153,6 @@ __all__ = [
     "WeightedRanker",
     # Validators
     "validate_embeddings_input_is_text",
+    # helpers
+    "remove_null_from_anyof",
 ]

{llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/agents/__init__.py RENAMED Viewed

@@ -21,6 +21,7 @@ from .models import (
     ResponseGuardrail,
     ResponseGuardrailSpec,
     ResponseItemInclude,
+    ResponseTruncation,
     RetrieveResponseRequest,
 )
@@ -33,6 +34,7 @@ __all__ = [
     "ResponseGuardrail",
     "ResponseGuardrailSpec",
     "ResponseItemInclude",
+    "ResponseTruncation",
     "RetrieveResponseRequest",
     "fastapi_routes",
 ]

{llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/agents/fastapi_routes.py RENAMED Viewed

@@ -17,10 +17,11 @@ import logging  # allow-direct-logging
 from collections.abc import AsyncIterator
 from typing import Annotated, Any
-from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
+from fastapi import APIRouter, Body, Depends, Path, Query
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
+from llama_stack_api.common.errors import OpenAIErrorResponse
 from llama_stack_api.common.responses import Order
 from llama_stack_api.openai_responses import (
     ListOpenAIResponseInputItem,
@@ -29,9 +30,11 @@ from llama_stack_api.openai_responses import (
     OpenAIResponseObject,
 )
 from llama_stack_api.router_utils import (
+    ExceptionTranslatingRoute,
     create_path_dependency,
     create_query_dependency,
     standard_responses,
+    try_translate_to_http_exception,
 )
 from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -72,8 +75,10 @@ async def sse_generator(event_gen):
         raise  # Re-raise to maintain proper cancellation semantics
     except Exception as e:
         logger.exception("Error in SSE generator")
-        exc = _http_exception_from_sse_error(e)
-        yield create_sse_event({"error": {"status_code": exc.status_code, "message": exc.detail}})
+        http_exc = try_translate_to_http_exception(e)
+        status_code = http_exc.status_code if http_exc else 500
+        detail = http_exc.detail if http_exc else "Internal server error: An unexpected error occurred."
+        yield create_sse_event(OpenAIErrorResponse.from_message(detail, code=str(status_code)).to_dict())
 # Automatically generate dependency functions from Pydantic models
@@ -115,29 +120,6 @@ async def get_list_response_input_items_request(
     )
-def _http_exception_from_value_error(exc: ValueError) -> HTTPException:
-    """Convert implementation `ValueError` into an OpenAI-compatible HTTP error.
-    The compatibility OpenAI client maps HTTP 400 -> `BadRequestError`.
-    The existing API surface (and integration tests) expect "not found" cases
-    to be represented as a 400, not a 404.
-    """
-    detail = str(exc) or "Invalid value"
-    return HTTPException(status_code=400, detail=detail)
-def _http_exception_from_sse_error(exc: Exception) -> HTTPException:
-    if isinstance(exc, HTTPException):
-        return exc
-    if isinstance(exc, ValueError):
-        return _http_exception_from_value_error(exc)
-    status_code = getattr(exc, "status_code", None)
-    if isinstance(status_code, int):
-        return HTTPException(status_code=status_code, detail=str(exc))
-    return HTTPException(status_code=500, detail="Internal server error: An unexpected error occurred.")
 def _preserve_context_for_sse(event_gen):
     # StreamingResponse runs in a different task, losing request contextvars.
     # create_task inside context.run captures the context at task creation.
@@ -173,6 +155,7 @@ def create_router(impl: Agents) -> APIRouter:
         prefix=f"/{LLAMA_STACK_API_V1}",
         tags=["Agents"],
         responses=standard_responses,
+        route_class=ExceptionTranslatingRoute,
     )
     @router.get(
@@ -184,10 +167,7 @@ def create_router(impl: Agents) -> APIRouter:
     async def get_openai_response(
         request: Annotated[RetrieveResponseRequest, Depends(get_retrieve_response_request)],
     ) -> OpenAIResponseObject:
-        try:
-            return await impl.get_openai_response(request)
-        except ValueError as exc:
-            raise _http_exception_from_value_error(exc) from exc
+        return await impl.get_openai_response(request)
     @router.post(
         "/responses",
@@ -208,10 +188,7 @@ def create_router(impl: Agents) -> APIRouter:
     async def create_openai_response(
         request: Annotated[CreateResponseRequest, Body(...)],
     ) -> OpenAIResponseObject | StreamingResponse:
-        try:
-            result = await impl.create_openai_response(request)
-        except ValueError as exc:
-            raise _http_exception_from_value_error(exc) from exc
+        result = await impl.create_openai_response(request)
         # For streaming responses, wrap in StreamingResponse for HTTP requests.
         # The implementation is typed to return an `AsyncIterator` for streaming.
@@ -232,10 +209,7 @@ def create_router(impl: Agents) -> APIRouter:
     async def list_openai_responses(
         request: Annotated[ListResponsesRequest, Depends(get_list_responses_request)],
     ) -> ListOpenAIResponseObject:
-        try:
-            return await impl.list_openai_responses(request)
-        except ValueError as exc:
-            raise _http_exception_from_value_error(exc) from exc
+        return await impl.list_openai_responses(request)
     @router.get(
         "/responses/{response_id}/input_items",
@@ -246,10 +220,7 @@ def create_router(impl: Agents) -> APIRouter:
     async def list_openai_response_input_items(
         request: Annotated[ListResponseInputItemsRequest, Depends(get_list_response_input_items_request)],
     ) -> ListOpenAIResponseInputItem:
-        try:
-            return await impl.list_openai_response_input_items(request)
-        except ValueError as exc:
-            raise _http_exception_from_value_error(exc) from exc
+        return await impl.list_openai_response_input_items(request)
     @router.delete(
         "/responses/{response_id}",
@@ -260,9 +231,6 @@ def create_router(impl: Agents) -> APIRouter:
     async def delete_openai_response(
         request: Annotated[DeleteResponseRequest, Depends(get_delete_response_request)],
     ) -> OpenAIDeleteResponseObject:
-        try:
-            return await impl.delete_openai_response(request)
-        except ValueError as exc:
-            raise _http_exception_from_value_error(exc) from exc
+        return await impl.delete_openai_response(request)
     return router

{llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/agents/models.py RENAMED Viewed

@@ -15,6 +15,7 @@ from enum import StrEnum
 from pydantic import BaseModel, ConfigDict, Field
 from llama_stack_api.common.responses import Order
+from llama_stack_api.inference import ServiceTier
 from llama_stack_api.openai_responses import (
     OpenAIResponseInput,
     OpenAIResponseInputTool,
@@ -23,6 +24,7 @@ from llama_stack_api.openai_responses import (
     OpenAIResponseReasoning,
     OpenAIResponseText,
 )
+from llama_stack_api.schema_utils import remove_null_from_anyof
 class ResponseItemInclude(StrEnum):
@@ -37,6 +39,13 @@ class ResponseItemInclude(StrEnum):
     reasoning_encrypted_content = "reasoning.encrypted_content"
+class ResponseTruncation(StrEnum):
+    """Controls how the service truncates input when it exceeds the model context window."""
+    auto = "auto"  # Let the service decide how to truncate
+    disabled = "disabled"  # Disable truncation; context over limit results in 400 error
 class ResponseGuardrailSpec(BaseModel):
     """Specification for a guardrail to apply during response generation."""
@@ -49,13 +58,19 @@ class ResponseGuardrailSpec(BaseModel):
 ResponseGuardrail = str | ResponseGuardrailSpec
+# extra_body can be accessed via .model_extra
 class CreateResponseRequest(BaseModel):
     """Request model for creating a response."""
-    model_config = ConfigDict(extra="forbid")
+    model_config = ConfigDict(extra="allow")
     input: str | list[OpenAIResponseInput] = Field(..., description="Input message(s) to create the response.")
     model: str = Field(..., description="The underlying LLM used for completions.")
+    background: bool | None = Field(
+        default=None,
+        description="Whether to run the model response in the background. When true, returns immediately with status 'queued'.",
+        json_schema_extra=remove_null_from_anyof,
+    )
     prompt: OpenAIResponsePrompt | None = Field(
         default=None, description="Prompt object with ID, version, and variables."
     )
@@ -68,6 +83,11 @@ class CreateResponseRequest(BaseModel):
         default=None,
         description="Optional ID of a previous response to continue from.",
     )
+    prompt_cache_key: str | None = Field(
+        default=None,
+        max_length=64,
+        description="A key to use when reading from or writing to the prompt cache.",
+    )
     conversation: str | None = Field(
         default=None,
         description="Optional ID of a conversation to add the response to.",
@@ -86,6 +106,18 @@ class CreateResponseRequest(BaseModel):
         le=2.0,
         description="Sampling temperature.",
     )
+    top_p: float | None = Field(
+        default=None,
+        ge=0.0,
+        le=1.0,
+        description="Nucleus sampling parameter that controls response diversity (lower values increase focus).",
+    )
+    frequency_penalty: float | None = Field(
+        default=None,
+        ge=-2.0,
+        le=2.0,
+        description="Penalizes new tokens based on their frequency in the text so far.",
+    )
     text: OpenAIResponseText | None = Field(
         default=None,
         description="Configuration for text response generation.",
@@ -130,10 +162,30 @@ class CreateResponseRequest(BaseModel):
         max_length=64,
         description="A stable identifier used for safety monitoring and abuse detection.",
     )
+    service_tier: ServiceTier | None = Field(
+        default=None,
+        description="The service tier to use for this request.",
+    )
     metadata: dict[str, str] | None = Field(
         default=None,
         description="Dictionary of metadata key-value pairs to attach to the response.",
     )
+    truncation: ResponseTruncation | None = Field(
+        default=None,
+        description="Controls how the service truncates input when it exceeds the model context window.",
+    )
+    top_logprobs: int | None = Field(
+        default=None,
+        ge=0,
+        le=20,
+        description="The number of most likely tokens to return at each position, along with their log probabilities.",
+    )
+    presence_penalty: float | None = Field(
+        default=None,
+        ge=-2.0,
+        le=2.0,
+        description="Penalizes new tokens based on whether they appear in the text so far.",
+    )
 class RetrieveResponseRequest(BaseModel):

llama-stack-api 0.5.2__tar.gz → 0.6.0__tar.gz

llama-stack-api 0.5.2tar.gz → 0.6.0tar.gz