PyPI - llama-stack - Versions diffs - 0.4.0__tar.gz → 0.4.1__tar.gz - Mend

llama-stack 0.4.0tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (597) hide show

{llama_stack-0.4.0/src/llama_stack.egg-info → llama_stack-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama_stack
-Version: 0.4.0
+Version: 0.4.1
 Summary: Llama Stack
 Author-email: Meta Llama <llama-oss@meta.com>
 License: MIT
@@ -17,7 +17,7 @@ Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: PyYAML>=6.0
-Requires-Dist: aiohttp
+Requires-Dist: aiohttp>=3.13.3
 Requires-Dist: fastapi<1.0,>=0.115.0
 Requires-Dist: fire
 Requires-Dist: httpx
@@ -44,9 +44,9 @@ Requires-Dist: sqlalchemy[asyncio]>=2.0.41
 Requires-Dist: starlette>=0.49.1
 Requires-Dist: psycopg2-binary
 Requires-Dist: tornado>=6.5.3
-Requires-Dist: urllib3>=2.6.0
+Requires-Dist: urllib3>=2.6.3
 Provides-Extra: client
-Requires-Dist: llama-stack-client==0.4.0; extra == "client"
+Requires-Dist: llama-stack-client==0.4.1; extra == "client"
 Dynamic: license-file
 # Llama Stack

{llama_stack-0.4.0 → llama_stack-0.4.1}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ required-version = ">=0.7.0"
 [project]
 name = "llama_stack"
-version = "0.4.0"
+version = "0.4.1"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@@ -25,7 +25,7 @@ classifiers = [
 ]
 dependencies = [
     "PyYAML>=6.0",
-    "aiohttp",
+    "aiohttp>=3.13.3",
     "fastapi>=0.115.0,<1.0",                          # server
     "fire",                                           # for MCP in LLS client
     "httpx",
@@ -52,12 +52,12 @@ dependencies = [
     "starlette>=0.49.1",
     "psycopg2-binary",
     "tornado>=6.5.3",
-    "urllib3>=2.6.0",
+    "urllib3>=2.6.3",
 ]
 [project.optional-dependencies]
 client = [
-    "llama-stack-client==0.4.0",
+    "llama-stack-client==0.4.1",
 ]
 [dependency-groups]
@@ -106,7 +106,7 @@ type_checking = [
     "lm-format-enforcer",
     "mcp",
     "ollama",
-    "llama-stack-client==0.4.0",
+    "llama-stack-client==0.4.1",
 ]
 # These are the dependencies required for running unit tests.
 unit = [

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/core/stack.py RENAMED Viewed

@@ -110,6 +110,18 @@ REGISTRY_REFRESH_INTERVAL_SECONDS = 300
 REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None
+# ID fields for registered resources that should trigger skipping
+# when they resolve to empty/None (from conditional env vars like :+)
+RESOURCE_ID_FIELDS = [
+    "vector_store_id",
+    "model_id",
+    "shield_id",
+    "dataset_id",
+    "scoring_fn_id",
+    "benchmark_id",
+    "toolgroup_id",
+]
 def is_request_model(t: Any) -> bool:
     """Check if a type is a request model (Pydantic BaseModel).
@@ -346,15 +358,33 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
                             logger.debug(
                                 f"Skipping config env variable expansion for disabled provider: {v.get('provider_id', '')}"
                             )
-                            # Create a copy with resolved provider_id but original config
-                            disabled_provider = v.copy()
-                            disabled_provider["provider_id"] = resolved_provider_id
                             continue
                     except EnvVarError:
                         # If we can't resolve the provider_id, continue with normal processing
                         pass
-                # Normal processing for non-disabled providers
+                # Special handling for registered resources: check if ID field resolves to empty/None
+                # from conditional env vars (e.g., ${env.VAR:+value}) and skip the entry if so
+                if isinstance(v, dict):
+                    should_skip = False
+                    for id_field in RESOURCE_ID_FIELDS:
+                        if id_field in v:
+                            try:
+                                resolved_id = replace_env_vars(v[id_field], f"{path}[{i}].{id_field}")
+                                if resolved_id is None or resolved_id == "":
+                                    logger.debug(
+                                        f"Skipping {path}[{i}] with empty {id_field} (conditional env var not set)"
+                                    )
+                                    should_skip = True
+                                    break
+                            except EnvVarError as e:
+                                logger.warning(
+                                    f"Could not resolve {id_field} in {path}[{i}], env var '{e.var_name}': {e}"
+                                )
+                    if should_skip:
+                        continue
+                # Normal processing
                 result.append(replace_env_vars(v, f"{path}[{i}]"))
             except EnvVarError as e:
                 raise EnvVarError(e.var_name, e.path) from None

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/inline/vector_io/faiss/faiss.py RENAMED Viewed

@@ -18,6 +18,7 @@ from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
+from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
 from llama_stack_api import (
     EmbeddedChunk,
     Files,
@@ -72,9 +73,11 @@ class FaissIndex(EmbeddingIndex):
         if stored_data:
             data = json.loads(stored_data)
-            self.chunk_by_index = {
-                int(k): EmbeddedChunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()
-            }
+            self.chunk_by_index = {}
+            for k, v in data["chunk_by_index"].items():
+                chunk_data = json.loads(v)
+                # Use generic backward compatibility utility
+                self.chunk_by_index[int(k)] = load_embedded_chunk_with_backward_compat(chunk_data)
             buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
             try:

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py RENAMED Viewed

@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 import asyncio
+import json
 import re
 import sqlite3
 import struct
@@ -23,6 +24,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorStoreWithIndex,
 )
+from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
 from llama_stack_api import (
     EmbeddedChunk,
@@ -235,7 +237,8 @@ class SQLiteVecIndex(EmbeddingIndex):
             if score < score_threshold:
                 continue
             try:
-                embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
+                chunk_data = json.loads(chunk_json)
+                embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
             except Exception as e:
                 logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
                 continue
@@ -276,7 +279,8 @@ class SQLiteVecIndex(EmbeddingIndex):
             if score > -score_threshold:
                 continue
             try:
-                embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
+                chunk_data = json.loads(chunk_json)
+                embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
             except Exception as e:
                 logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
                 continue

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/registry/agents.py RENAMED Viewed

@@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
             provider_type="inline::meta-reference",
             pip_packages=[
                 "matplotlib",
+                "fonttools>=4.60.2",
                 "pillow",
                 "pandas",
                 "scikit-learn",

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/remote/vector_io/chroma/chroma.py RENAMED Viewed

@@ -17,6 +17,7 @@ from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
+from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
 from llama_stack_api import (
     EmbeddedChunk,
@@ -60,10 +61,12 @@ class ChromaIndex(EmbeddingIndex):
     async def initialize(self):
         pass
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
+        # Extract embeddings directly from chunks (already list[float])
+        embeddings = [chunk.embedding for chunk in chunks]
         ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
         await maybe_await(
@@ -84,7 +87,7 @@ class ChromaIndex(EmbeddingIndex):
         for dist, doc in zip(distances, documents, strict=False):
             try:
                 doc = json.loads(doc)
-                chunk = EmbeddedChunk(**doc)
+                chunk = load_embedded_chunk_with_backward_compat(doc)
             except Exception:
                 log.exception(f"Failed to parse document: {doc}")
                 continue
@@ -139,7 +142,7 @@ class ChromaIndex(EmbeddingIndex):
         for dist, doc in zip(distances, documents, strict=False):
             doc_data = json.loads(doc)
-            chunk = EmbeddedChunk(**doc_data)
+            chunk = load_embedded_chunk_with_backward_compat(doc_data)
             score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/remote/vector_io/milvus/milvus.py RENAMED Viewed

@@ -21,7 +21,10 @@ from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorStoreWithIndex,
 )
-from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
+from llama_stack.providers.utils.vector_io.vector_utils import (
+    load_embedded_chunk_with_backward_compat,
+    sanitize_collection_name,
+)
 from llama_stack_api import (
     EmbeddedChunk,
     Files,
@@ -39,6 +42,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 logger = get_logger(name=__name__, category="vector_io::milvus")
 VERSION = "v3"
 VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
@@ -65,10 +69,9 @@ class MilvusIndex(EmbeddingIndex):
         if await asyncio.to_thread(self.client.has_collection, self.collection_name):
             await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
         if not await asyncio.to_thread(self.client.has_collection, self.collection_name):
             logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
@@ -81,7 +84,7 @@ class MilvusIndex(EmbeddingIndex):
                 max_length=65535,
                 enable_analyzer=True,  # Enable text analysis for BM25
             )
-            schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(embeddings[0]))
+            schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(chunks[0].embedding))
             schema.add_field(field_name="chunk_content", datatype=DataType.JSON)
             # Add sparse vector field for BM25 (required by the function)
             schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
@@ -110,12 +113,12 @@ class MilvusIndex(EmbeddingIndex):
             )
         data = []
-        for chunk, embedding in zip(chunks, embeddings, strict=False):
+        for chunk in chunks:
             data.append(
                 {
                     "chunk_id": chunk.chunk_id,
                     "content": chunk.content,
-                    "vector": embedding,
+                    "vector": chunk.embedding,  # Already a list[float]
                     "chunk_content": chunk.model_dump(),
                     # sparse field will be handled by BM25 function automatically
                 }
@@ -136,7 +139,7 @@ class MilvusIndex(EmbeddingIndex):
             output_fields=["*"],
             search_params={"params": {"radius": score_threshold}},
         )
-        chunks = [EmbeddedChunk(**res["entity"]["chunk_content"]) for res in search_res[0]]
+        chunks = [load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"]) for res in search_res[0]]
         scores = [res["distance"] for res in search_res[0]]
         return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -163,7 +166,7 @@ class MilvusIndex(EmbeddingIndex):
             chunks = []
             scores = []
             for res in search_res[0]:
-                chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
+                chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
                 chunks.append(chunk)
                 scores.append(res["distance"])  # BM25 score from Milvus
@@ -191,7 +194,7 @@ class MilvusIndex(EmbeddingIndex):
             output_fields=["*"],
             limit=k,
         )
-        chunks = [EmbeddedChunk(**res["chunk_content"]) for res in search_res]
+        chunks = [load_embedded_chunk_with_backward_compat(res["chunk_content"]) for res in search_res]
         scores = [1.0] * len(chunks)  # Simple binary score for text search
         return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -243,7 +246,7 @@ class MilvusIndex(EmbeddingIndex):
         chunks = []
         scores = []
         for res in search_res[0]:
-            chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
+            chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
             chunks.append(chunk)
             scores.append(res["distance"])

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py RENAMED Viewed

@@ -18,7 +18,11 @@ from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
-from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
+from llama_stack.providers.utils.vector_io.vector_utils import (
+    WeightedInMemoryAggregator,
+    load_embedded_chunk_with_backward_compat,
+    sanitize_collection_name,
+)
 from llama_stack_api import (
     EmbeddedChunk,
     Files,
@@ -130,19 +134,18 @@ class PGVectorIndex(EmbeddingIndex):
             log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
             raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
         values = []
-        for i, chunk in enumerate(chunks):
+        for chunk in chunks:
             content_text = interleaved_content_as_str(chunk.content)
             values.append(
                 (
                     f"{chunk.chunk_id}",
                     Json(chunk.model_dump()),
-                    embeddings[i].tolist(),
+                    chunk.embedding,  # Already a list[float]
                     content_text,
                     content_text,  # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
                 )
@@ -194,7 +197,7 @@ class PGVectorIndex(EmbeddingIndex):
                 score = 1.0 / float(dist) if dist != 0 else float("inf")
                 if score < score_threshold:
                     continue
-                chunks.append(EmbeddedChunk(**doc))
+                chunks.append(load_embedded_chunk_with_backward_compat(doc))
                 scores.append(score)
             return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -230,7 +233,7 @@ class PGVectorIndex(EmbeddingIndex):
             for doc, score in results:
                 if score < score_threshold:
                     continue
-                chunks.append(EmbeddedChunk(**doc))
+                chunks.append(load_embedded_chunk_with_backward_compat(doc))
                 scores.append(float(score))
             return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -306,7 +309,8 @@ class PGVectorIndex(EmbeddingIndex):
         """Remove a chunk from the PostgreSQL table."""
         chunk_ids = [c.chunk_id for c in chunks_for_deletion]
         with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-            cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids))
+            # Fix: Use proper tuple parameter binding with explicit array cast
+            cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s::text[])", (chunk_ids,))
     def get_pgvector_search_function(self) -> str:
         return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py RENAMED Viewed

@@ -18,6 +18,7 @@ from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
+from llama_stack.providers.utils.vector_io.vector_utils import load_embedded_chunk_with_backward_compat
 from llama_stack_api import (
     EmbeddedChunk,
     Files,
@@ -66,24 +67,23 @@ class QdrantIndex(EmbeddingIndex):
         # If the collection does not exist, it will be created in add_chunks.
         pass
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
         if not await self.client.collection_exists(self.collection_name):
             await self.client.create_collection(
                 self.collection_name,
-                vectors_config=models.VectorParams(size=len(embeddings[0]), distance=models.Distance.COSINE),
+                vectors_config=models.VectorParams(size=len(chunks[0].embedding), distance=models.Distance.COSINE),
             )
         points = []
-        for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
+        for chunk in chunks:
             chunk_id = chunk.chunk_id
             points.append(
                 PointStruct(
                     id=convert_id(chunk_id),
-                    vector=embedding,
+                    vector=chunk.embedding,  # Already a list[float]
                     payload={"chunk_content": chunk.model_dump()} | {CHUNK_ID_KEY: chunk_id},
                 )
             )
@@ -118,7 +118,7 @@ class QdrantIndex(EmbeddingIndex):
             assert point.payload is not None
             try:
-                chunk = EmbeddedChunk(**point.payload["chunk_content"])
+                chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
             except Exception:
                 log.exception("Failed to parse chunk")
                 continue
@@ -172,7 +172,7 @@ class QdrantIndex(EmbeddingIndex):
                 raise RuntimeError("Qdrant query returned point with no payload")
             try:
-                chunk = EmbeddedChunk(**point.payload["chunk_content"])
+                chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
             except Exception:
                 chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
                 point_id = getattr(point, "id", "unknown")
@@ -242,7 +242,7 @@ class QdrantIndex(EmbeddingIndex):
                 raise RuntimeError("Qdrant query returned point with no payload")
             try:
-                chunk = EmbeddedChunk(**point.payload["chunk_content"])
+                chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
             except Exception:
                 chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
                 point_id = getattr(point, "id", "unknown")

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py RENAMED Viewed

@@ -22,6 +22,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorStoreWithIndex,
 )
+from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
 from llama_stack_api import (
     EmbeddedChunk,
@@ -57,20 +58,19 @@ class WeaviateIndex(EmbeddingIndex):
     async def initialize(self):
         pass
-    async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
-        assert len(chunks) == len(embeddings), (
-            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
-        )
+    async def add_chunks(self, chunks: list[EmbeddedChunk]):
+        if not chunks:
+            return
         data_objects = []
-        for chunk, embedding in zip(chunks, embeddings, strict=False):
+        for chunk in chunks:
             data_objects.append(
                 wvc.data.DataObject(
                     properties={
                         "chunk_id": chunk.chunk_id,
                         "chunk_content": chunk.model_dump_json(),
                     },
-                    vector=embedding.tolist(),
+                    vector=chunk.embedding,  # Already a list[float]
                 )
             )
@@ -116,7 +116,7 @@ class WeaviateIndex(EmbeddingIndex):
             chunk_json = doc.properties["chunk_content"]
             try:
                 chunk_dict = json.loads(chunk_json)
-                chunk = EmbeddedChunk(**chunk_dict)
+                chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
             except Exception:
                 log.exception(f"Failed to parse document: {chunk_json}")
                 continue
@@ -176,7 +176,7 @@ class WeaviateIndex(EmbeddingIndex):
             chunk_json = doc.properties["chunk_content"]
             try:
                 chunk_dict = json.loads(chunk_json)
-                chunk = EmbeddedChunk(**chunk_dict)
+                chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
             except Exception:
                 log.exception(f"Failed to parse document: {chunk_json}")
                 continue
@@ -245,7 +245,7 @@ class WeaviateIndex(EmbeddingIndex):
             chunk_json = doc.properties["chunk_content"]
             try:
                 chunk_dict = json.loads(chunk_json)
-                chunk = EmbeddedChunk(**chunk_dict)
+                chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
             except Exception:
                 log.exception(f"Failed to parse document: {chunk_json}")
                 continue

llama_stack-0.4.1/src/llama_stack/providers/utils/vector_io/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from .vector_utils import (
+    WeightedInMemoryAggregator,
+    generate_chunk_id,
+    load_embedded_chunk_with_backward_compat,
+    proper_case,
+    sanitize_collection_name,
+)
+__all__ = [
+    "WeightedInMemoryAggregator",
+    "generate_chunk_id",
+    "load_embedded_chunk_with_backward_compat",
+    "proper_case",
+    "sanitize_collection_name",
+]

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack/providers/utils/vector_io/vector_utils.py RENAMED Viewed

@@ -7,6 +7,9 @@
 import hashlib
 import re
 import uuid
+from typing import Any
+from llama_stack_api import EmbeddedChunk
 def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
@@ -154,3 +157,36 @@ class WeightedInMemoryAggregator:
             # Default to RRF for None, RRF, or any unknown types
             impact_factor = reranker_params.get("impact_factor", 60.0)
             return WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor)
+def load_embedded_chunk_with_backward_compat(
+    chunk_data: dict[str, Any],
+) -> EmbeddedChunk:
+    """
+    Load EmbeddedChunk data with backward compatibility for legacy field locations.
+    Handles migration from old format where embedding_model and embedding_dimension
+    were stored in chunk_metadata to current top-level format.
+    Args:
+        chunk_data: Dictionary containing chunk data to load
+    Returns:
+        EmbeddedChunk object with migrated data
+    """
+    # Migrate old data: extract embedding_model/embedding_dimension from chunk_metadata if missing
+    if "embedding_model" not in chunk_data:
+        chunk_metadata = chunk_data.get("chunk_metadata", {})
+        chunk_data["embedding_model"] = chunk_metadata.get("chunk_embedding_model", "unknown")
+    if "embedding_dimension" not in chunk_data:
+        chunk_metadata = chunk_data.get("chunk_metadata", {})
+        chunk_data["embedding_dimension"] = chunk_metadata.get(
+            "chunk_embedding_dimension", len(chunk_data.get("embedding", []))
+        )
+    # Ensure embedding field exists (required by EmbeddedChunk)
+    if "embedding" not in chunk_data:
+        chunk_data["embedding"] = []
+    return EmbeddedChunk(**chunk_data)

{llama_stack-0.4.0 → llama_stack-0.4.1/src/llama_stack.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama_stack
-Version: 0.4.0
+Version: 0.4.1
 Summary: Llama Stack
 Author-email: Meta Llama <llama-oss@meta.com>
 License: MIT
@@ -17,7 +17,7 @@ Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: PyYAML>=6.0
-Requires-Dist: aiohttp
+Requires-Dist: aiohttp>=3.13.3
 Requires-Dist: fastapi<1.0,>=0.115.0
 Requires-Dist: fire
 Requires-Dist: httpx
@@ -44,9 +44,9 @@ Requires-Dist: sqlalchemy[asyncio]>=2.0.41
 Requires-Dist: starlette>=0.49.1
 Requires-Dist: psycopg2-binary
 Requires-Dist: tornado>=6.5.3
-Requires-Dist: urllib3>=2.6.0
+Requires-Dist: urllib3>=2.6.3
 Provides-Extra: client
-Requires-Dist: llama-stack-client==0.4.0; extra == "client"
+Requires-Dist: llama-stack-client==0.4.1; extra == "client"
 Dynamic: license-file
 # Llama Stack

{llama_stack-0.4.0 → llama_stack-0.4.1}/src/llama_stack.egg-info/requires.txt RENAMED Viewed

@@ -1,5 +1,5 @@
 PyYAML>=6.0
-aiohttp
+aiohttp>=3.13.3
 fastapi<1.0,>=0.115.0
 fire
 httpx
@@ -26,7 +26,7 @@ sqlalchemy[asyncio]>=2.0.41
 starlette>=0.49.1
 psycopg2-binary
 tornado>=6.5.3
-urllib3>=2.6.0
+urllib3>=2.6.3
 [client]
-llama-stack-client==0.4.0
+llama-stack-client==0.4.1

llama_stack-0.4.0/src/llama_stack_api/common/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.