PyPI - llama-stack - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

llama-stack 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

llama_stack/cli/stack/run.py +3 -0
llama_stack/core/stack.py +56 -14
llama_stack/providers/inline/tool_runtime/rag/memory.py +8 -3
llama_stack/providers/inline/vector_io/faiss/faiss.py +6 -3
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +6 -2
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/remote/vector_io/chroma/chroma.py +9 -6
llama_stack/providers/remote/vector_io/milvus/milvus.py +15 -12
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +14 -10
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +10 -10
llama_stack/providers/remote/vector_io/weaviate/weaviate.py +9 -9
llama_stack/providers/utils/memory/vector_store.py +9 -4
llama_stack/providers/utils/vector_io/__init__.py +16 -0
llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
{llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/METADATA +4 -4
{llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/RECORD +87 -20
llama_stack_api/llama_stack_api/__init__.py +945 -0
llama_stack_api/llama_stack_api/admin/__init__.py +45 -0
llama_stack_api/llama_stack_api/admin/api.py +72 -0
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +117 -0
llama_stack_api/llama_stack_api/admin/models.py +113 -0
llama_stack_api/llama_stack_api/agents.py +173 -0
llama_stack_api/llama_stack_api/batches/__init__.py +40 -0
llama_stack_api/llama_stack_api/batches/api.py +53 -0
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +113 -0
llama_stack_api/llama_stack_api/batches/models.py +78 -0
llama_stack_api/llama_stack_api/benchmarks/__init__.py +43 -0
llama_stack_api/llama_stack_api/benchmarks/api.py +39 -0
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +109 -0
llama_stack_api/llama_stack_api/benchmarks/models.py +109 -0
llama_stack_api/llama_stack_api/common/__init__.py +5 -0
llama_stack_api/llama_stack_api/common/content_types.py +101 -0
llama_stack_api/llama_stack_api/common/errors.py +95 -0
llama_stack_api/llama_stack_api/common/job_types.py +38 -0
llama_stack_api/llama_stack_api/common/responses.py +77 -0
llama_stack_api/llama_stack_api/common/training_types.py +47 -0
llama_stack_api/llama_stack_api/common/type_system.py +146 -0
llama_stack_api/llama_stack_api/connectors.py +146 -0
llama_stack_api/llama_stack_api/conversations.py +270 -0
llama_stack_api/llama_stack_api/datasetio.py +55 -0
llama_stack_api/llama_stack_api/datasets/__init__.py +61 -0
llama_stack_api/llama_stack_api/datasets/api.py +35 -0
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +104 -0
llama_stack_api/llama_stack_api/datasets/models.py +152 -0
llama_stack_api/llama_stack_api/datatypes.py +373 -0
llama_stack_api/llama_stack_api/eval.py +137 -0
llama_stack_api/llama_stack_api/file_processors/__init__.py +27 -0
llama_stack_api/llama_stack_api/file_processors/api.py +64 -0
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +78 -0
llama_stack_api/llama_stack_api/file_processors/models.py +42 -0
llama_stack_api/llama_stack_api/files/__init__.py +35 -0
llama_stack_api/llama_stack_api/files/api.py +51 -0
llama_stack_api/llama_stack_api/files/fastapi_routes.py +124 -0
llama_stack_api/llama_stack_api/files/models.py +107 -0
llama_stack_api/llama_stack_api/inference.py +1169 -0
llama_stack_api/llama_stack_api/inspect_api/__init__.py +37 -0
llama_stack_api/llama_stack_api/inspect_api/api.py +25 -0
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +76 -0
llama_stack_api/llama_stack_api/inspect_api/models.py +28 -0
llama_stack_api/llama_stack_api/internal/__init__.py +9 -0
llama_stack_api/llama_stack_api/internal/kvstore.py +26 -0
llama_stack_api/llama_stack_api/internal/sqlstore.py +79 -0
llama_stack_api/llama_stack_api/models.py +171 -0
llama_stack_api/llama_stack_api/openai_responses.py +1468 -0
llama_stack_api/llama_stack_api/post_training.py +370 -0
llama_stack_api/llama_stack_api/prompts.py +203 -0
llama_stack_api/llama_stack_api/providers/__init__.py +33 -0
llama_stack_api/llama_stack_api/providers/api.py +16 -0
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +57 -0
llama_stack_api/llama_stack_api/providers/models.py +24 -0
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +168 -0
llama_stack_api/llama_stack_api/resource.py +37 -0
llama_stack_api/llama_stack_api/router_utils.py +160 -0
llama_stack_api/llama_stack_api/safety.py +132 -0
llama_stack_api/llama_stack_api/schema_utils.py +208 -0
llama_stack_api/llama_stack_api/scoring.py +93 -0
llama_stack_api/llama_stack_api/scoring_functions.py +211 -0
llama_stack_api/llama_stack_api/shields.py +93 -0
llama_stack_api/llama_stack_api/tools.py +226 -0
llama_stack_api/llama_stack_api/vector_io.py +941 -0
llama_stack_api/llama_stack_api/vector_stores.py +51 -0
llama_stack_api/llama_stack_api/version.py +9 -0
{llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/WHEEL +0 -0
{llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/licenses/LICENSE +0 -0
{llama_stack-0.4.0.dist-info → llama_stack-0.4.2.dist-info}/top_level.txt +0 -0

llama_stack/cli/stack/run.py CHANGED Viewed

@@ -202,6 +202,9 @@ class StackRun(Subcommand):
         # Set the config file in environment so create_app can find it
         os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
+        # disable together banner that spams llama stack run every time
+        os.environ["TOGETHER_NO_BANNER"] = "1"
         uvicorn_config = {
             "factory": True,
             "host": host,

llama_stack/core/stack.py CHANGED Viewed

@@ -53,6 +53,7 @@ from llama_stack_api import (
     PostTraining,
     Prompts,
     Providers,
+    RegisterBenchmarkRequest,
     Safety,
     Scoring,
     ScoringFunctions,
@@ -61,6 +62,7 @@ from llama_stack_api import (
     ToolRuntime,
     VectorIO,
 )
+from llama_stack_api.datasets import RegisterDatasetRequest
 logger = get_logger(name=__name__, category="core")
@@ -91,18 +93,21 @@ class LlamaStack(
     pass
+# Resources to register based on configuration.
+# If a request class is specified, the configuration object will be converted to this class before invoking the registration method.
 RESOURCES = [
-    ("models", Api.models, "register_model", "list_models"),
-    ("shields", Api.shields, "register_shield", "list_shields"),
-    ("datasets", Api.datasets, "register_dataset", "list_datasets"),
+    ("models", Api.models, "register_model", "list_models", None),
+    ("shields", Api.shields, "register_shield", "list_shields", None),
+    ("datasets", Api.datasets, "register_dataset", "list_datasets", RegisterDatasetRequest),
     (
         "scoring_fns",
         Api.scoring_functions,
         "register_scoring_function",
         "list_scoring_functions",
+        None,
     ),
-    ("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks"),
-    ("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups"),
+    ("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks", RegisterBenchmarkRequest),
+    ("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups", None),
 ]
@@ -110,6 +115,18 @@ REGISTRY_REFRESH_INTERVAL_SECONDS = 300
 REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None
+# ID fields for registered resources that should trigger skipping
+# when they resolve to empty/None (from conditional env vars like :+)
+RESOURCE_ID_FIELDS = [
+    "vector_store_id",
+    "model_id",
+    "shield_id",
+    "dataset_id",
+    "scoring_fn_id",
+    "benchmark_id",
+    "toolgroup_id",
+]
 def is_request_model(t: Any) -> bool:
     """Check if a type is a request model (Pydantic BaseModel).
@@ -187,7 +204,7 @@ async def invoke_with_optional_request(method: Any) -> Any:
 async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
-    for rsrc, api, register_method, list_method in RESOURCES:
+    for rsrc, api, register_method, list_method, request_class in RESOURCES:
         objects = getattr(run_config.registered_resources, rsrc)
         if api not in impls:
             continue
@@ -201,10 +218,17 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
                     continue
                 logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}")
-            # we want to maintain the type information in arguments to method.
-            # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
-            # we use model_dump() to find all the attrs and then getattr to get the still typed value.
-            await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
+            # TODO: Once all register methods are migrated to accept request objects,
+            # remove this conditional and always use the request_class pattern.
+            if request_class is not None:
+                request = request_class(**obj.model_dump())
+                await method(request)
+            else:
+                # we want to maintain the type information in arguments to method.
+                # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
+                # we use model_dump() to find all the attrs and then getattr to get the still typed
+                # value.
+                await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
         method = getattr(impls[api], list_method)
         response = await invoke_with_optional_request(method)
@@ -346,15 +370,33 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
                             logger.debug(
                                 f"Skipping config env variable expansion for disabled provider: {v.get('provider_id', '')}"
                             )
-                            # Create a copy with resolved provider_id but original config
-                            disabled_provider = v.copy()
-                            disabled_provider["provider_id"] = resolved_provider_id
                             continue
                     except EnvVarError:
                         # If we can't resolve the provider_id, continue with normal processing
                         pass
-                # Normal processing for non-disabled providers
+                # Special handling for registered resources: check if ID field resolves to empty/None
+                # from conditional env vars (e.g., ${env.VAR:+value}) and skip the entry if so
+                if isinstance(v, dict):
+                    should_skip = False
+                    for id_field in RESOURCE_ID_FIELDS:
+                        if id_field in v:
+                            try:
+                                resolved_id = replace_env_vars(v[id_field], f"{path}[{i}].{id_field}")
+                                if resolved_id is None or resolved_id == "":
+                                    logger.debug(
+                                        f"Skipping {path}[{i}] with empty {id_field} (conditional env var not set)"
+                                    )
+                                    should_skip = True
+                                    break
+                            except EnvVarError as e:
+                                logger.warning(
+                                    f"Could not resolve {id_field} in {path}[{i}], env var '{e.var_name}': {e}"
+                                )
+                    if should_skip:
+                        continue
+                # Normal processing
                 result.append(replace_env_vars(v, f"{path}[{i}]"))
             except EnvVarError as e:
                 raise EnvVarError(e.var_name, e.path) from None

llama_stack/providers/inline/tool_runtime/rag/memory.py CHANGED Viewed

@@ -50,8 +50,11 @@ log = get_logger(name=__name__, category="tool_runtime")
 async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
     """Get raw binary data and mime type from a RAGDocument for file upload."""
     if isinstance(doc.content, URL):
-        if doc.content.uri.startswith("data:"):
-            parts = parse_data_url(doc.content.uri)
+        uri = doc.content.uri
+        if uri.startswith("file://"):
+            raise ValueError("file:// URIs are not supported. Please use the Files API (/v1/files) to upload files.")
+        if uri.startswith("data:"):
+            parts = parse_data_url(uri)
             mime_type = parts["mimetype"]
             data = parts["data"]
@@ -63,7 +66,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
             return file_data, mime_type
         else:
             async with httpx.AsyncClient() as client:
-                r = await client.get(doc.content.uri)
+                r = await client.get(uri)
                 r.raise_for_status()
                 mime_type = r.headers.get("content-type", "application/octet-stream")
                 return r.content, mime_type
@@ -73,6 +76,8 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
         else:
             content_str = interleaved_content_as_str(doc.content)
+        if content_str.startswith("file://"):
+            raise ValueError("file:// URIs are not supported. Please use the Files API (/v1/files) to upload files.")
         if content_str.startswith("data:"):
             parts = parse_data_url(content_str)
             mime_type = parts["mimetype"]

llama_stack/providers/inline/vector_io/faiss/faiss.py CHANGED Viewed

@@ -18,6 +18,7 @@ from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
+from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
 from llama_stack_api import (
     EmbeddedChunk,
     Files,
@@ -72,9 +73,11 @@ class FaissIndex(EmbeddingIndex):
         if stored_data:
             data = json.loads(stored_data)
-            self.chunk_by_index = {
-                int(k): EmbeddedChunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()
-            }
+            self.chunk_by_index = {}
+            for k, v in data["chunk_by_index"].items():
+                chunk_data = json.loads(v)
+                # Use generic backward compatibility utility
+                self.chunk_by_index[int(k)] = load_embedded_chunk_with_backward_compat(chunk_data)
             buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
             try:

llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py CHANGED Viewed

@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 import asyncio
+import json
 import re
 import sqlite3
 import struct
@@ -23,6 +24,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorStoreWithIndex,
 )
+from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
 from llama_stack_api import (
     EmbeddedChunk,
@@ -235,7 +237,8 @@ class SQLiteVecIndex(EmbeddingIndex):
             if score < score_threshold:
                 continue
             try:
-                embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
+                chunk_data = json.loads(chunk_json)
+                embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
             except Exception as e:
                 logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
                 continue
@@ -276,7 +279,8 @@ class SQLiteVecIndex(EmbeddingIndex):
             if score > -score_threshold:
                 continue
             try:
-                embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
+                chunk_data = json.loads(chunk_json)
+                embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
             except Exception as e:
                 logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
                 continue

llama_stack/providers/registry/agents.py CHANGED Viewed

@@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
             provider_type="inline::meta-reference",
             pip_packages=[
                 "matplotlib",
+                "fonttools>=4.60.2",
                 "pillow",
                 "pandas",
                 "scikit-learn",

llama_stack/providers/remote/vector_io/chroma/chroma.py CHANGED Viewed

@@ -17,6 +17,7 @@ from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
+from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
 from llama_stack_api import (
     EmbeddedChunk,
@@ -60,10 +61,12 @@ class ChromaIndex(EmbeddingIndex):
     async def initialize(self):
         pass
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
+        # Extract embeddings directly from chunks (already list[float])
+        embeddings = [chunk.embedding for chunk in chunks]
         ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
         await maybe_await(
@@ -84,7 +87,7 @@ class ChromaIndex(EmbeddingIndex):
         for dist, doc in zip(distances, documents, strict=False):
             try:
                 doc = json.loads(doc)
-                chunk = EmbeddedChunk(**doc)
+                chunk = load_embedded_chunk_with_backward_compat(doc)
             except Exception:
                 log.exception(f"Failed to parse document: {doc}")
                 continue
@@ -139,7 +142,7 @@ class ChromaIndex(EmbeddingIndex):
         for dist, doc in zip(distances, documents, strict=False):
             doc_data = json.loads(doc)
-            chunk = EmbeddedChunk(**doc_data)
+            chunk = load_embedded_chunk_with_backward_compat(doc_data)
             score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0

llama_stack/providers/remote/vector_io/milvus/milvus.py CHANGED Viewed

@@ -21,7 +21,10 @@ from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorStoreWithIndex,
 )
-from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
+from llama_stack.providers.utils.vector_io.vector_utils import (
+    load_embedded_chunk_with_backward_compat,
+    sanitize_collection_name,
+)
 from llama_stack_api import (
     EmbeddedChunk,
     Files,
@@ -39,6 +42,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 logger = get_logger(name=__name__, category="vector_io::milvus")
 VERSION = "v3"
 VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
@@ -65,10 +69,9 @@ class MilvusIndex(EmbeddingIndex):
         if await asyncio.to_thread(self.client.has_collection, self.collection_name):
             await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
         if not await asyncio.to_thread(self.client.has_collection, self.collection_name):
             logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
@@ -81,7 +84,7 @@ class MilvusIndex(EmbeddingIndex):
                 max_length=65535,
                 enable_analyzer=True,  # Enable text analysis for BM25
             )
-            schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(embeddings[0]))
+            schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(chunks[0].embedding))
             schema.add_field(field_name="chunk_content", datatype=DataType.JSON)
             # Add sparse vector field for BM25 (required by the function)
             schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
@@ -110,12 +113,12 @@ class MilvusIndex(EmbeddingIndex):
             )
         data = []
-        for chunk, embedding in zip(chunks, embeddings, strict=False):
+        for chunk in chunks:
             data.append(
                 {
                     "chunk_id": chunk.chunk_id,
                     "content": chunk.content,
-                    "vector": embedding,
+                    "vector": chunk.embedding,  # Already a list[float]
                     "chunk_content": chunk.model_dump(),
                     # sparse field will be handled by BM25 function automatically
                 }
@@ -136,7 +139,7 @@ class MilvusIndex(EmbeddingIndex):
             output_fields=["*"],
             search_params={"params": {"radius": score_threshold}},
         )
-        chunks = [EmbeddedChunk(**res["entity"]["chunk_content"]) for res in search_res[0]]
+        chunks = [load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"]) for res in search_res[0]]
         scores = [res["distance"] for res in search_res[0]]
         return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -163,7 +166,7 @@ class MilvusIndex(EmbeddingIndex):
             chunks = []
             scores = []
             for res in search_res[0]:
-                chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
+                chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
                 chunks.append(chunk)
                 scores.append(res["distance"])  # BM25 score from Milvus
@@ -191,7 +194,7 @@ class MilvusIndex(EmbeddingIndex):
             output_fields=["*"],
             limit=k,
         )
-        chunks = [EmbeddedChunk(**res["chunk_content"]) for res in search_res]
+        chunks = [load_embedded_chunk_with_backward_compat(res["chunk_content"]) for res in search_res]
         scores = [1.0] * len(chunks)  # Simple binary score for text search
         return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -243,7 +246,7 @@ class MilvusIndex(EmbeddingIndex):
         chunks = []
         scores = []
         for res in search_res[0]:
-            chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
+            chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
             chunks.append(chunk)
             scores.append(res["distance"])

llama_stack/providers/remote/vector_io/pgvector/pgvector.py CHANGED Viewed

@@ -18,7 +18,11 @@ from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
-from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
+from llama_stack.providers.utils.vector_io.vector_utils import (
+    WeightedInMemoryAggregator,
+    load_embedded_chunk_with_backward_compat,
+    sanitize_collection_name,
+)
 from llama_stack_api import (
     EmbeddedChunk,
     Files,
@@ -130,19 +134,18 @@ class PGVectorIndex(EmbeddingIndex):
             log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
             raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
         values = []
-        for i, chunk in enumerate(chunks):
+        for chunk in chunks:
             content_text = interleaved_content_as_str(chunk.content)
             values.append(
                 (
                     f"{chunk.chunk_id}",
                     Json(chunk.model_dump()),
-                    embeddings[i].tolist(),
+                    chunk.embedding,  # Already a list[float]
                     content_text,
                     content_text,  # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
                 )
@@ -194,7 +197,7 @@ class PGVectorIndex(EmbeddingIndex):
                 score = 1.0 / float(dist) if dist != 0 else float("inf")
                 if score < score_threshold:
                     continue
-                chunks.append(EmbeddedChunk(**doc))
+                chunks.append(load_embedded_chunk_with_backward_compat(doc))
                 scores.append(score)
             return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -230,7 +233,7 @@ class PGVectorIndex(EmbeddingIndex):
             for doc, score in results:
                 if score < score_threshold:
                     continue
-                chunks.append(EmbeddedChunk(**doc))
+                chunks.append(load_embedded_chunk_with_backward_compat(doc))
                 scores.append(float(score))
             return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -306,7 +309,8 @@ class PGVectorIndex(EmbeddingIndex):
         """Remove a chunk from the PostgreSQL table."""
         chunk_ids = [c.chunk_id for c in chunks_for_deletion]
         with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-            cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids))
+            # Fix: Use proper tuple parameter binding with explicit array cast
+            cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s::text[])", (chunk_ids,))
     def get_pgvector_search_function(self) -> str:
         return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]

llama_stack/providers/remote/vector_io/qdrant/qdrant.py CHANGED Viewed

@@ -18,6 +18,7 @@ from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
+from llama_stack.providers.utils.vector_io.vector_utils import load_embedded_chunk_with_backward_compat
 from llama_stack_api import (
     EmbeddedChunk,
     Files,
@@ -66,24 +67,23 @@ class QdrantIndex(EmbeddingIndex):
         # If the collection does not exist, it will be created in add_chunks.
         pass
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
         if not await self.client.collection_exists(self.collection_name):
             await self.client.create_collection(
                 self.collection_name,
-                vectors_config=models.VectorParams(size=len(embeddings[0]), distance=models.Distance.COSINE),
+                vectors_config=models.VectorParams(size=len(chunks[0].embedding), distance=models.Distance.COSINE),
             )
         points = []
-        for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
+        for chunk in chunks:
             chunk_id = chunk.chunk_id
             points.append(
                 PointStruct(
                     id=convert_id(chunk_id),
-                    vector=embedding,
+                    vector=chunk.embedding,  # Already a list[float]
                     payload={"chunk_content": chunk.model_dump()} | {CHUNK_ID_KEY: chunk_id},
                 )
             )
@@ -118,7 +118,7 @@ class QdrantIndex(EmbeddingIndex):
             assert point.payload is not None
             try:
-                chunk = EmbeddedChunk(**point.payload["chunk_content"])
+                chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
             except Exception:
                 log.exception("Failed to parse chunk")
                 continue
@@ -172,7 +172,7 @@ class QdrantIndex(EmbeddingIndex):
                 raise RuntimeError("Qdrant query returned point with no payload")
             try:
-                chunk = EmbeddedChunk(**point.payload["chunk_content"])
+                chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
             except Exception:
                 chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
                 point_id = getattr(point, "id", "unknown")
@@ -242,7 +242,7 @@ class QdrantIndex(EmbeddingIndex):
                 raise RuntimeError("Qdrant query returned point with no payload")
             try:
-                chunk = EmbeddedChunk(**point.payload["chunk_content"])
+                chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
             except Exception:
                 chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
                 point_id = getattr(point, "id", "unknown")

llama_stack/providers/remote/vector_io/weaviate/weaviate.py CHANGED Viewed

@@ -22,6 +22,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorStoreWithIndex,
 )
+from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
 from llama_stack_api import (
     EmbeddedChunk,
@@ -57,20 +58,19 @@ class WeaviateIndex(EmbeddingIndex):
     async def initialize(self):
         pass
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
         data_objects = []
-        for chunk, embedding in zip(chunks, embeddings, strict=False):
+        for chunk in chunks:
             data_objects.append(
                 wvc.data.DataObject(
                     properties={
                         "chunk_id": chunk.chunk_id,
                         "chunk_content": chunk.model_dump_json(),
                     },
-                    vector=embedding.tolist(),
+                    vector=chunk.embedding,  # Already a list[float]
                 )
             )
@@ -116,7 +116,7 @@ class WeaviateIndex(EmbeddingIndex):
             chunk_json = doc.properties["chunk_content"]
             try:
                 chunk_dict = json.loads(chunk_json)
-                chunk = EmbeddedChunk(**chunk_dict)
+                chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
             except Exception:
                 log.exception(f"Failed to parse document: {chunk_json}")
                 continue
@@ -176,7 +176,7 @@ class WeaviateIndex(EmbeddingIndex):
             chunk_json = doc.properties["chunk_content"]
             try:
                 chunk_dict = json.loads(chunk_json)
-                chunk = EmbeddedChunk(**chunk_dict)
+                chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
             except Exception:
                 log.exception(f"Failed to parse document: {chunk_json}")
                 continue
@@ -245,7 +245,7 @@ class WeaviateIndex(EmbeddingIndex):
             chunk_json = doc.properties["chunk_content"]
             try:
                 chunk_dict = json.loads(chunk_json)
-                chunk = EmbeddedChunk(**chunk_dict)
+                chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
             except Exception:
                 log.exception(f"Failed to parse document: {chunk_json}")
                 continue

llama_stack/providers/utils/memory/vector_store.py CHANGED Viewed

@@ -135,15 +135,20 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en
 async def content_from_doc(doc: RAGDocument) -> str:
     if isinstance(doc.content, URL):
-        if doc.content.uri.startswith("data:"):
-            return content_from_data(doc.content.uri)
+        uri = doc.content.uri
+        if uri.startswith("file://"):
+            raise ValueError("file:// URIs are not supported. Please use the Files API (/v1/files) to upload files.")
+        if uri.startswith("data:"):
+            return content_from_data(uri)
         async with httpx.AsyncClient() as client:
-            r = await client.get(doc.content.uri)
+            r = await client.get(uri)
         if doc.mime_type == "application/pdf":
             return parse_pdf(r.content)
         return r.text
     elif isinstance(doc.content, str):
-        pattern = re.compile("^(https?://|file://|data:)")
+        if doc.content.startswith("file://"):
+            raise ValueError("file:// URIs are not supported. Please use the Files API (/v1/files) to upload files.")
+        pattern = re.compile("^(https?://|data:)")
         if pattern.match(doc.content):
             if doc.content.startswith("data:"):
                 return content_from_data(doc.content)

llama_stack/providers/utils/vector_io/__init__.py CHANGED Viewed

@@ -3,3 +3,19 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from .vector_utils import (
+    WeightedInMemoryAggregator,
+    generate_chunk_id,
+    load_embedded_chunk_with_backward_compat,
+    proper_case,
+    sanitize_collection_name,
+)
+__all__ = [
+    "WeightedInMemoryAggregator",
+    "generate_chunk_id",
+    "load_embedded_chunk_with_backward_compat",
+    "proper_case",
+    "sanitize_collection_name",
+]

llama-stack 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

llama-stack 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl