PyPI - llama-stack - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llama-stack 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

llama_stack/providers/inline/tool_runtime/rag/memory.py CHANGED Viewed

@@ -14,35 +14,32 @@ import httpx
 from fastapi import UploadFile
 from pydantic import TypeAdapter
-from llama_stack.apis.common.content_types import (
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
+from llama_stack.providers.utils.memory.vector_store import parse_data_url
+from llama_stack_api import (
     URL,
+    Files,
+    Inference,
     InterleavedContent,
     InterleavedContentItem,
-    TextContentItem,
-)
-from llama_stack.apis.files import Files, OpenAIFilePurpose
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.tools import (
     ListToolDefsResponse,
+    OpenAIFilePurpose,
+    QueryChunksResponse,
     RAGDocument,
     RAGQueryConfig,
     RAGQueryResult,
-    RAGToolRuntime,
+    TextContentItem,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
-)
-from llama_stack.apis.vector_io import (
-    QueryChunksResponse,
+    UploadFileRequest,
     VectorIO,
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
 )
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-from llama_stack.providers.utils.memory.vector_store import parse_data_url
 from .config import RagToolRuntimeConfig
 from .context_retriever import generate_rag_query
@@ -91,7 +88,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
             return content_str.encode("utf-8"), "text/plain"
-class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
+class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
     def __init__(
         self,
         config: RagToolRuntimeConfig,
@@ -119,9 +116,11 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
     async def insert(
         self,
         documents: list[RAGDocument],
-        vector_db_id: str,
-        chunk_size_in_tokens: int = 512,
+        vector_store_id: str,
+        chunk_size_in_tokens: int | None = None,
     ) -> None:
+        if chunk_size_in_tokens is None:
+            chunk_size_in_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_size_tokens
         if not documents:
             return
@@ -143,29 +142,31 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
                 try:
                     created_file = await self.files_api.openai_upload_file(
-                        file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
+                        request=UploadFileRequest(purpose=OpenAIFilePurpose.ASSISTANTS),
+                        file=upload_file,
                     )
                 except Exception as e:
                     log.error(f"Failed to upload file for document {doc.document_id}: {e}")
                     continue
+                overlap_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_overlap_tokens
                 chunking_strategy = VectorStoreChunkingStrategyStatic(
                     static=VectorStoreChunkingStrategyStaticConfig(
                         max_chunk_size_tokens=chunk_size_in_tokens,
-                        chunk_overlap_tokens=chunk_size_in_tokens // 4,
+                        chunk_overlap_tokens=overlap_tokens,
                     )
                 )
                 try:
                     await self.vector_io_api.openai_attach_file_to_vector_store(
-                        vector_store_id=vector_db_id,
+                        vector_store_id=vector_store_id,
                         file_id=created_file.id,
                         attributes=doc.metadata,
                         chunking_strategy=chunking_strategy,
                     )
                 except Exception as e:
                     log.error(
-                        f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
+                        f"Failed to attach file {created_file.id} to vector store {vector_store_id} for document {doc.document_id}: {e}"
                     )
                     continue
@@ -176,15 +177,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
     async def query(
         self,
         content: InterleavedContent,
-        vector_db_ids: list[str],
+        vector_store_ids: list[str],
         query_config: RAGQueryConfig | None = None,
     ) -> RAGQueryResult:
-        if not vector_db_ids:
+        if not vector_store_ids:
             raise ValueError(
                 "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
             )
-        query_config = query_config or RAGQueryConfig()
+        query_config = query_config or RAGQueryConfig(
+            max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
+        )
         query = await generate_rag_query(
             query_config.query_generator_config,
             content,
@@ -192,7 +195,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
         )
         tasks = [
             self.vector_io_api.query_chunks(
-                vector_db_id=vector_db_id,
+                vector_store_id=vector_store_id,
                 query=query,
                 params={
                     "mode": query_config.mode,
@@ -201,18 +204,20 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
                     "ranker": query_config.ranker,
                 },
             )
-            for vector_db_id in vector_db_ids
+            for vector_store_id in vector_store_ids
         ]
         results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
         chunks = []
         scores = []
-        for vector_db_id, result in zip(vector_db_ids, results, strict=False):
-            for chunk, score in zip(result.chunks, result.scores, strict=False):
-                if not hasattr(chunk, "metadata") or chunk.metadata is None:
+        for vector_store_id, result in zip(vector_store_ids, results, strict=False):
+            for embedded_chunk, score in zip(result.chunks, result.scores, strict=False):
+                # EmbeddedChunk inherits from Chunk, so use it directly
+                chunk = embedded_chunk
+                if chunk.metadata is None:
                     chunk.metadata = {}
-                chunk.metadata["vector_db_id"] = vector_db_id
+                chunk.metadata["vector_store_id"] = vector_store_id
                 chunks.append(chunk)
                 scores.append(score)
@@ -225,13 +230,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
         chunks = chunks[: query_config.max_chunks]
         tokens = 0
-        picked: list[InterleavedContentItem] = [
-            TextContentItem(
-                text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n"
-            )
-        ]
-        for i, chunk in enumerate(chunks):
-            metadata = chunk.metadata
+        # Get templates from vector stores config
+        vector_stores_config = self.config.vector_stores_config
+        header_template = vector_stores_config.file_search_params.header_template
+        footer_template = vector_stores_config.file_search_params.footer_template
+        chunk_template = vector_stores_config.context_prompt_params.chunk_annotation_template
+        context_template = vector_stores_config.context_prompt_params.context_template
+        picked: list[InterleavedContentItem] = [TextContentItem(text=header_template.format(num_chunks=len(chunks)))]
+        for i, embedded_chunk in enumerate(chunks):
+            metadata = embedded_chunk.metadata
             tokens += metadata.get("token_count", 0)
             tokens += metadata.get("metadata_token_count", 0)
@@ -250,22 +259,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
             metadata_keys_to_exclude_from_context = [
                 "token_count",
                 "metadata_token_count",
-                "vector_db_id",
+                "vector_store_id",
             ]
             metadata_for_context = {}
             for k in chunk_metadata_keys_to_include_from_context:
-                metadata_for_context[k] = getattr(chunk.chunk_metadata, k)
+                metadata_for_context[k] = getattr(embedded_chunk.chunk_metadata, k)
             for k in metadata:
                 if k not in metadata_keys_to_exclude_from_context:
                     metadata_for_context[k] = metadata[k]
-            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
+            text_content = chunk_template.format(index=i + 1, chunk=embedded_chunk, metadata=metadata_for_context)
             picked.append(TextContentItem(text=text_content))
-        picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
+        picked.append(TextContentItem(text=footer_template))
         picked.append(
             TextContentItem(
-                text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n',
+                text=context_template.format(query=interleaved_content_as_str(content), annotation_instruction="")
             )
         )
@@ -275,12 +284,15 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
                 "document_ids": [c.document_id for c in chunks[: len(picked)]],
                 "chunks": [c.content for c in chunks[: len(picked)]],
                 "scores": scores[: len(picked)],
-                "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
+                "vector_store_ids": [c.metadata["vector_store_id"] for c in chunks[: len(picked)]],
             },
         )
     async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
+        self,
+        tool_group_id: str | None = None,
+        mcp_endpoint: URL | None = None,
+        authorization: str | None = None,
     ) -> ListToolDefsResponse:
         # Parameters are not listed since these methods are not yet invoked automatically
         # by the LLM. The method is only implemented so things like /tools can list without
@@ -308,18 +320,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
             ]
         )
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
-        vector_db_ids = kwargs.get("vector_db_ids", [])
+    async def invoke_tool(
+        self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
+    ) -> ToolInvocationResult:
+        vector_store_ids = kwargs.get("vector_store_ids", [])
         query_config = kwargs.get("query_config")
         if query_config:
             query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
         else:
-            query_config = RAGQueryConfig()
+            query_config = RAGQueryConfig(
+                max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
+            )
         query = kwargs["query"]
         result = await self.query(
             content=query,
-            vector_db_ids=vector_db_ids,
+            vector_store_ids=vector_store_ids,
             query_config=query_config,
         )

llama_stack/providers/inline/vector_io/chroma/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@
 from typing import Any
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 from .config import ChromaVectorIOConfig

llama_stack/providers/inline/vector_io/chroma/config.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import Any
 from pydantic import BaseModel, Field
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 @json_schema_type

llama_stack/providers/inline/vector_io/faiss/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@
 from typing import Any
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 from .config import FaissVectorIOConfig

llama_stack/providers/inline/vector_io/faiss/config.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import Any
 from pydantic import BaseModel
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 @json_schema_type

llama_stack/providers/inline/vector_io/faiss/faiss.py CHANGED Viewed

@@ -10,21 +10,28 @@ import io
 import json
 from typing import Any
-import faiss
+import faiss  # type: ignore[import-untyped]
 import numpy as np
 from numpy.typing import NDArray
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
+from llama_stack_api import (
+    EmbeddedChunk,
+    Files,
+    HealthResponse,
+    HealthStatus,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
+from llama_stack_api.internal.kvstore import KVStore
 from .config import FaissVectorIOConfig
@@ -41,7 +48,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten
 class FaissIndex(EmbeddingIndex):
     def __init__(self, dimension: int, kvstore: KVStore | None = None, bank_id: str | None = None):
         self.index = faiss.IndexFlatL2(dimension)
-        self.chunk_by_index: dict[int, Chunk] = {}
+        self.chunk_by_index: dict[int, EmbeddedChunk] = {}
         self.kvstore = kvstore
         self.bank_id = bank_id
@@ -65,12 +72,14 @@ class FaissIndex(EmbeddingIndex):
         if stored_data:
             data = json.loads(stored_data)
-            self.chunk_by_index = {int(k): Chunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()}
+            self.chunk_by_index = {
+                int(k): EmbeddedChunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()
+            }
             buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
             try:
                 self.index = faiss.deserialize_index(np.load(buffer, allow_pickle=False))
-                self.chunk_ids = [chunk.chunk_id for chunk in self.chunk_by_index.values()]
+                self.chunk_ids = [embedded_chunk.chunk_id for embedded_chunk in self.chunk_by_index.values()]
             except Exception as e:
                 logger.debug(e, exc_info=True)
                 raise ValueError(
@@ -100,19 +109,24 @@ class FaissIndex(EmbeddingIndex):
         await self.kvstore.delete(f"{FAISS_INDEX_PREFIX}{self.bank_id}")
-    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
-        # Add dimension check
+    async def add_chunks(self, embedded_chunks: list[EmbeddedChunk]):
+        if not embedded_chunks:
+            return
+        # Extract embeddings and validate dimensions
+        embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
         embedding_dim = embeddings.shape[1] if len(embeddings.shape) > 1 else embeddings.shape[0]
         if embedding_dim != self.index.d:
             raise ValueError(f"Embedding dimension mismatch. Expected {self.index.d}, got {embedding_dim}")
+        # Store chunks by index
         indexlen = len(self.chunk_by_index)
-        for i, chunk in enumerate(chunks):
-            self.chunk_by_index[indexlen + i] = chunk
+        for i, embedded_chunk in enumerate(embedded_chunks):
+            self.chunk_by_index[indexlen + i] = embedded_chunk
         async with self.chunk_id_lock:
-            self.index.add(np.array(embeddings).astype(np.float32))
-            self.chunk_ids.extend([chunk.chunk_id for chunk in chunks])
+            self.index.add(embeddings)
+            self.chunk_ids.extend([ec.chunk_id for ec in embedded_chunks])  # EmbeddedChunk inherits from Chunk
         # Save updated index
         await self._save_index()
@@ -144,8 +158,8 @@ class FaissIndex(EmbeddingIndex):
     async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
-        chunks = []
-        scores = []
+        chunks: list[EmbeddedChunk] = []
+        scores: list[float] = []
         for d, i in zip(distances[0], indices[0], strict=False):
             if i < 0:
                 continue
@@ -178,9 +192,8 @@ class FaissIndex(EmbeddingIndex):
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
-        super().__init__(files_api=files_api, kvstore=None)
+        super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
         self.config = config
-        self.inference_api = inference_api
         self.cache: dict[str, VectorStoreWithIndex] = {}
     async def initialize(self) -> None:
@@ -271,19 +284,21 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
         self.cache[vector_store_id] = index
         return index
-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = self.cache.get(vector_db_id)
+    async def insert_chunks(
+        self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
+    ) -> None:
+        index = self.cache.get(vector_store_id)
         if index is None:
-            raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
+            raise ValueError(f"Vector DB {vector_store_id} not found. found: {self.cache.keys()}")
         await index.insert_chunks(chunks)
     async def query_chunks(
-        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
-        index = self.cache.get(vector_db_id)
+        index = self.cache.get(vector_store_id)
         if index is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)
         return await index.query_chunks(query, params)

llama_stack/providers/inline/vector_io/milvus/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@
 from typing import Any
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 from .config import MilvusVectorIOConfig

llama_stack/providers/inline/vector_io/milvus/config.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import Any
 from pydantic import BaseModel, Field
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 @json_schema_type

llama_stack/providers/inline/vector_io/qdrant/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@
 from typing import Any
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 from .config import QdrantVectorIOConfig

llama_stack/providers/inline/vector_io/qdrant/config.py CHANGED Viewed

@@ -10,7 +10,7 @@ from typing import Any
 from pydantic import BaseModel
 from llama_stack.core.storage.datatypes import KVStoreReference
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 @json_schema_type

llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@
 from typing import Any
-from llama_stack.providers.datatypes import Api
+from llama_stack_api import Api
 from .config import SQLiteVectorIOConfig

llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py CHANGED Viewed

@@ -11,18 +11,11 @@ import struct
 from typing import Any
 import numpy as np
-import sqlite_vec
+import sqlite_vec  # type: ignore[import-untyped]
 from numpy.typing import NDArray
-from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack.apis.vector_stores import VectorStore
+from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import (
     RERANKER_TYPE_RRF,
@@ -31,6 +24,17 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
+from llama_stack_api import (
+    EmbeddedChunk,
+    Files,
+    Inference,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
+from llama_stack_api.internal.kvstore import KVStore
 logger = get_logger(name=__name__, category="vector_io")
@@ -137,14 +141,16 @@ class SQLiteVecIndex(EmbeddingIndex):
         await asyncio.to_thread(_drop_tables)
-    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray, batch_size: int = 500):
+    async def add_chunks(self, embedded_chunks: list[EmbeddedChunk], batch_size: int = 500):
         """
-        Add new chunks along with their embeddings using batch inserts.
-        For each chunk, we insert its JSON into the metadata table and then insert its
+        Add new embedded chunks using batch inserts.
+        For each embedded chunk, we insert the chunk JSON into the metadata table and then insert its
         embedding (serialized to raw bytes) into the virtual table using the assigned rowid.
         If any insert fails, the transaction is rolled back to maintain consistency.
         Also inserts chunk content into FTS table for keyword search support.
         """
+        chunks = embedded_chunks  # EmbeddedChunk now inherits from Chunk
+        embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
         assert all(isinstance(chunk.content, str) for chunk in chunks), "SQLiteVecIndex only supports text chunks"
         def _execute_all_batch_inserts():
@@ -229,11 +235,11 @@ class SQLiteVecIndex(EmbeddingIndex):
             if score < score_threshold:
                 continue
             try:
-                chunk = Chunk.model_validate_json(chunk_json)
+                embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
             except Exception as e:
                 logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
                 continue
-            chunks.append(chunk)
+            chunks.append(embedded_chunk)
             scores.append(score)
         return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -270,11 +276,11 @@ class SQLiteVecIndex(EmbeddingIndex):
             if score > -score_threshold:
                 continue
             try:
-                chunk = Chunk.model_validate_json(chunk_json)
+                embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
             except Exception as e:
                 logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
                 continue
-            chunks.append(chunk)
+            chunks.append(embedded_chunk)
             scores.append(score)
         return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -308,13 +314,14 @@ class SQLiteVecIndex(EmbeddingIndex):
         vector_response = await self.query_vector(embedding, k, score_threshold)
         keyword_response = await self.query_keyword(query_string, k, score_threshold)
-        # Convert responses to score dictionaries using chunk_id
+        # Convert responses to score dictionaries using chunk_id (EmbeddedChunk inherits from Chunk)
         vector_scores = {
-            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
+            embedded_chunk.chunk_id: score
+            for embedded_chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
         }
         keyword_scores = {
-            chunk.chunk_id: score
-            for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
+            embedded_chunk.chunk_id: score
+            for embedded_chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
         }
         # Combine scores using the reranking utility
@@ -329,10 +336,10 @@ class SQLiteVecIndex(EmbeddingIndex):
         # Filter by score threshold
         filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
-        # Create a map of chunk_id to chunk for both responses
-        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
+        # Create a map of chunk_id to embedded_chunk for both responses
+        chunk_map = {ec.chunk_id: ec for ec in vector_response.chunks + keyword_response.chunks}
-        # Use the map to look up chunks by their IDs
+        # Use the map to look up embedded chunks by their IDs
         chunks = []
         scores = []
         for doc_id, score in filtered_items:
@@ -382,9 +389,8 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
     """
     def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
-        super().__init__(files_api=files_api, kvstore=None)
+        super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
         self.config = config
-        self.inference_api = inference_api
         self.cache: dict[str, VectorStoreWithIndex] = {}
         self.vector_store_table = None
@@ -458,20 +464,21 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
         await self.cache[vector_store_id].index.delete()
         del self.cache[vector_store_id]
-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+    async def insert_chunks(
+        self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
+    ) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
         if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
-        # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
-        # and then call our index's add_chunks.
+            raise VectorStoreNotFoundError(vector_store_id)
+        # The VectorStoreWithIndex helper validates embeddings and calls the index's add_chunks method
         await index.insert_chunks(chunks)
     async def query_chunks(
-        self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
         if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)
         return await index.query_chunks(query, params)
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:

llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

llama-stack 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl