PyPI - llama-stack - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llama-stack 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

{llama_stack/apis/vector_io → llama_stack_api}/vector_io.py RENAMED Viewed

@@ -8,19 +8,15 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-import uuid
 from typing import Annotated, Any, Literal, Protocol, runtime_checkable
-from fastapi import Body
-from pydantic import BaseModel, Field
+from fastapi import Body, Query
+from pydantic import BaseModel, Field, field_validator
-from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
-from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
-from llama_stack.schema_utils import json_schema_type, webmethod
-from llama_stack.strong_typing.schema import register_schema
+from llama_stack_api.inference import InterleavedContent
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api.version import LLAMA_STACK_API_V1
 @json_schema_type
@@ -37,8 +33,6 @@ class ChunkMetadata(BaseModel):
     :param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
     :param chunk_window: The window of the chunk, which can be used to group related chunks together.
     :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
-    :param chunk_embedding_model: The embedding model used to create the chunk's embedding.
-    :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
     :param content_token_count: The number of tokens in the content of the chunk.
     :param metadata_token_count: The number of tokens in the metadata of the chunk.
     """
@@ -50,8 +44,6 @@ class ChunkMetadata(BaseModel):
     updated_timestamp: int | None = None
     chunk_window: str | None = None
     chunk_tokenizer: str | None = None
-    chunk_embedding_model: str | None = None
-    chunk_embedding_dimension: int | None = None
     content_token_count: int | None = None
     metadata_token_count: int | None = None
@@ -59,39 +51,18 @@ class ChunkMetadata(BaseModel):
 @json_schema_type
 class Chunk(BaseModel):
     """
-    A chunk of content that can be inserted into a vector database.
+    A chunk of content from file processing.
     :param content: The content of the chunk, which can be interleaved text, images, or other types.
-    :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
+    :param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
     :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
-    :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
     :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
         The `chunk_metadata` is required backend functionality.
     """
     content: InterleavedContent
+    chunk_id: str
     metadata: dict[str, Any] = Field(default_factory=dict)
-    embedding: list[float] | None = None
-    # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
-    stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
-    chunk_metadata: ChunkMetadata | None = None
-    model_config = {"populate_by_name": True}
-    def model_post_init(self, __context):
-        # Extract chunk_id from metadata if present
-        if self.metadata and "chunk_id" in self.metadata:
-            self.stored_chunk_id = self.metadata.pop("chunk_id")
-    @property
-    def chunk_id(self) -> str:
-        """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
-        if self.stored_chunk_id:
-            return self.stored_chunk_id
-        if "document_id" in self.metadata:
-            return generate_chunk_id(self.metadata["document_id"], str(self.content))
-        return generate_chunk_id(str(uuid.uuid4()), str(self.content))
+    chunk_metadata: ChunkMetadata
     @property
     def document_id(self) -> str | None:
@@ -110,15 +81,30 @@ class Chunk(BaseModel):
         return None
+@json_schema_type
+class EmbeddedChunk(Chunk):
+    """
+    A chunk of content with its embedding vector for vector database operations.
+    Inherits all fields from Chunk and adds embedding-related fields.
+    :param embedding: The embedding vector for the chunk content.
+    :param embedding_model: The model used to generate the embedding (e.g., 'openai/text-embedding-3-small').
+    :param embedding_dimension: The dimension of the embedding vector.
+    """
+    embedding: list[float]
+    embedding_model: str
+    embedding_dimension: int
 @json_schema_type
 class QueryChunksResponse(BaseModel):
     """Response from querying chunks in a vector database.
-    :param chunks: List of content chunks returned from the query
+    :param chunks: List of embedded chunks returned from the query
     :param scores: Relevance scores corresponding to each returned chunk
     """
-    chunks: list[Chunk]
+    chunks: list[EmbeddedChunk]
     scores: list[float]
@@ -245,10 +231,16 @@ class VectorStoreContent(BaseModel):
     :param type: Content type, currently only "text" is supported
     :param text: The actual text content
+    :param embedding: Optional embedding vector for this content chunk
+    :param chunk_metadata: Optional chunk metadata
+    :param metadata: Optional user-defined metadata
     """
     type: Literal["text"]
     text: str
+    embedding: list[float] | None = None
+    chunk_metadata: ChunkMetadata | None = None
+    metadata: dict[str, Any] | None = None
 @json_schema_type
@@ -281,7 +273,7 @@ class VectorStoreSearchResponsePage(BaseModel):
     """
     object: str = "vector_store.search_results.page"
-    search_query: str
+    search_query: list[str]
     data: list[VectorStoreSearchResponse]
     has_more: bool = False
     next_page: str | None = None
@@ -301,6 +293,22 @@ class VectorStoreDeleteResponse(BaseModel):
     deleted: bool = True
+@json_schema_type
+class VectorStoreFileContentResponse(BaseModel):
+    """Represents the parsed content of a vector store file.
+    :param object: The object type, which is always `vector_store.file_content.page`
+    :param data: Parsed content of the file
+    :param has_more: Indicates if there are more content pages to fetch
+    :param next_page: The token for the next page, if any
+    """
+    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
+    data: list[VectorStoreContent]
+    has_more: bool = False
+    next_page: str | None = None
 @json_schema_type
 class VectorStoreChunkingStrategyAuto(BaseModel):
     """Automatic chunking strategy for vector store files.
@@ -372,6 +380,65 @@ VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal[
 register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
+# VectorStoreFileAttributes type with OpenAPI constraints
+VectorStoreFileAttributes = Annotated[
+    dict[str, Annotated[str, Field(max_length=512)] | float | bool],
+    Field(
+        max_length=16,
+        json_schema_extra={
+            "propertyNames": {"type": "string", "maxLength": 64},
+            "x-oaiTypeLabel": "map",
+        },
+        description=(
+            "Set of 16 key-value pairs that can be attached to an object. This can be "
+            "useful for storing additional information about the object in a structured "
+            "format, and querying for objects via API or the dashboard. Keys are strings "
+            "with a maximum length of 64 characters. Values are strings with a maximum "
+            "length of 512 characters, booleans, or numbers."
+        ),
+    ),
+]
+def _sanitize_vector_store_attributes(metadata: dict[str, Any] | None) -> dict[str, str | float | bool]:
+    """
+    Sanitize metadata to VectorStoreFileAttributes spec (max 16 properties, primitives only).
+    Converts dict[str, Any] to dict[str, str | float | bool]:
+    - Preserves: str (truncated to 512 chars), bool, int/float (as float)
+    - Converts: list -> comma-separated string
+    - Filters: dict, None, other types
+    - Enforces: max 16 properties, max 64 char keys, max 512 char string values
+    """
+    if not metadata:
+        return {}
+    sanitized: dict[str, str | float | bool] = {}
+    for key, value in metadata.items():
+        # Enforce max 16 properties
+        if len(sanitized) >= 16:
+            break
+        # Enforce max 64 char keys
+        if len(key) > 64:
+            continue
+        # Convert to supported primitive types
+        if isinstance(value, bool):
+            sanitized[key] = value
+        elif isinstance(value, int | float):
+            sanitized[key] = float(value)
+        elif isinstance(value, str):
+            # Enforce max 512 char string values
+            sanitized[key] = value[:512] if len(value) > 512 else value
+        elif isinstance(value, list):
+            # Convert lists to comma-separated strings (max 512 chars)
+            list_str = ", ".join(str(item) for item in value)
+            sanitized[key] = list_str[:512] if len(list_str) > 512 else list_str
+    return sanitized
 @json_schema_type
 class VectorStoreFileObject(BaseModel):
     """OpenAI Vector Store File object.
@@ -389,7 +456,7 @@ class VectorStoreFileObject(BaseModel):
     id: str
     object: str = "vector_store.file"
-    attributes: dict[str, Any] = Field(default_factory=dict)
+    attributes: VectorStoreFileAttributes = Field(default_factory=dict)
     chunking_strategy: VectorStoreChunkingStrategy
     created_at: int
     last_error: VectorStoreFileLastError | None = None
@@ -397,6 +464,12 @@ class VectorStoreFileObject(BaseModel):
     usage_bytes: int = 0
     vector_store_id: str
+    @field_validator("attributes", mode="before")
+    @classmethod
+    def _validate_attributes(cls, v: dict[str, Any] | None) -> dict[str, str | float | bool]:
+        """Sanitize attributes to match VectorStoreFileAttributes OpenAPI spec."""
+        return _sanitize_vector_store_attributes(v)
 @json_schema_type
 class VectorStoreListFilesResponse(BaseModel):
@@ -416,22 +489,6 @@ class VectorStoreListFilesResponse(BaseModel):
     has_more: bool = False
-@json_schema_type
-class VectorStoreFileContentsResponse(BaseModel):
-    """Response from retrieving the contents of a vector store file.
-    :param file_id: Unique identifier for the file
-    :param filename: Name of the file
-    :param attributes: Key-value attributes associated with the file
-    :param content: List of content items from the file
-    """
-    file_id: str
-    filename: str
-    attributes: dict[str, Any]
-    content: list[VectorStoreContent]
 @json_schema_type
 class VectorStoreFileDeleteResponse(BaseModel):
     """Response from deleting a vector store file.
@@ -499,7 +556,7 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
     name: str | None = None
     file_ids: list[str] | None = None
     expires_after: dict[str, Any] | None = None
-    chunking_strategy: dict[str, Any] | None = None
+    chunking_strategy: VectorStoreChunkingStrategy | None = None
     metadata: dict[str, Any] | None = None
@@ -523,42 +580,39 @@ class VectorStoreTable(Protocol):
 @runtime_checkable
-@trace_protocol
 class VectorIO(Protocol):
     vector_store_table: VectorStoreTable | None = None
     # this will just block now until chunks are inserted, but it should
     # probably return a Job instance which can be polled for completion
-    # TODO: rename vector_db_id to vector_store_id once Stainless is working
+    # TODO: rename vector_store_id to vector_store_id once Stainless is working
     @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
     async def insert_chunks(
         self,
-        vector_db_id: str,
-        chunks: list[Chunk],
+        vector_store_id: str,
+        chunks: list[EmbeddedChunk],
         ttl_seconds: int | None = None,
     ) -> None:
-        """Insert chunks into a vector database.
+        """Insert embedded chunks into a vector database.
-        :param vector_db_id: The identifier of the vector database to insert the chunks into.
-        :param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
-            `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
-            If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
-            If `embedding` is not provided, it will be computed later.
+        :param vector_store_id: The identifier of the vector database to insert the chunks into.
+        :param chunks: The embedded chunks to insert. Each `EmbeddedChunk` contains the content, metadata,
+            and embedding vector ready for storage.
         :param ttl_seconds: The time to live of the chunks.
         """
         ...
-    # TODO: rename vector_db_id to vector_store_id once Stainless is working
+    # TODO: rename vector_store_id to vector_store_id once Stainless is working
     @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
     async def query_chunks(
         self,
-        vector_db_id: str,
+        vector_store_id: str,
         query: InterleavedContent,
         params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
         """Query chunks from a vector database.
-        :param vector_db_id: The identifier of the vector database to query.
+        :param vector_store_id: The identifier of the vector database to query.
         :param query: The query to search for.
         :param params: The parameters of the query.
         :returns: A QueryChunksResponse.
@@ -566,7 +620,6 @@ class VectorIO(Protocol):
         ...
     # OpenAI Vector Stores API endpoints
-    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_create_vector_store(
         self,
@@ -579,7 +632,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_vector_stores(
         self,
@@ -598,9 +650,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
-    )
     @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_vector_store(
         self,
@@ -613,9 +662,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}",
         method="POST",
@@ -638,9 +684,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}",
         method="DELETE",
@@ -657,12 +700,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/search",
-        method="POST",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/search",
         method="POST",
@@ -695,12 +732,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files",
-        method="POST",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/files",
         method="POST",
@@ -723,12 +754,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/files",
         method="GET",
@@ -755,12 +780,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/files/{file_id}",
         method="GET",
@@ -779,12 +798,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/files/{file_id}/content",
         method="GET",
@@ -794,21 +807,19 @@ class VectorIO(Protocol):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+        include_embeddings: Annotated[bool | None, Query()] = False,
+        include_metadata: Annotated[bool | None, Query()] = False,
+    ) -> VectorStoreFileContentResponse:
         """Retrieves the contents of a vector store file.
         :param vector_store_id: The ID of the vector store containing the file to retrieve.
         :param file_id: The ID of the file to retrieve.
-        :returns: A list of InterleavedContent representing the file contents.
+        :param include_embeddings: Whether to include embedding vectors in the response.
+        :param include_metadata: Whether to include chunk metadata in the response.
+        :returns: File contents, optionally with embeddings and metadata based on query parameters.
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
-        method="POST",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/files/{file_id}",
         method="POST",
@@ -829,12 +840,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
-        method="DELETE",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/files/{file_id}",
         method="DELETE",
@@ -858,12 +863,6 @@ class VectorIO(Protocol):
         method="POST",
         level=LLAMA_STACK_API_V1,
     )
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
-        method="POST",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     async def openai_create_vector_store_file_batch(
         self,
         vector_store_id: str,
@@ -882,12 +881,6 @@ class VectorIO(Protocol):
         method="GET",
         level=LLAMA_STACK_API_V1,
     )
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     async def openai_retrieve_vector_store_file_batch(
         self,
         batch_id: str,
@@ -901,12 +894,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
         method="GET",
@@ -935,12 +922,6 @@ class VectorIO(Protocol):
         """
         ...
-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
-        method="POST",
-        level=LLAMA_STACK_API_V1,
-        deprecated=True,
-    )
     @webmethod(
         route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
         method="POST",

{llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py RENAMED Viewed

@@ -8,7 +8,7 @@ from typing import Literal
 from pydantic import BaseModel
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 # Internal resource type for storing the vector store routing and other information

llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

llama-stack 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl