llama-stack 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llama_stack/core/stack.py CHANGED
@@ -110,6 +110,18 @@ REGISTRY_REFRESH_INTERVAL_SECONDS = 300
110
110
  REGISTRY_REFRESH_TASK = None
111
111
  TEST_RECORDING_CONTEXT = None
112
112
 
113
+ # ID fields for registered resources that should trigger skipping
114
+ # when they resolve to empty/None (from conditional env vars like :+)
115
+ RESOURCE_ID_FIELDS = [
116
+ "vector_store_id",
117
+ "model_id",
118
+ "shield_id",
119
+ "dataset_id",
120
+ "scoring_fn_id",
121
+ "benchmark_id",
122
+ "toolgroup_id",
123
+ ]
124
+
113
125
 
114
126
  def is_request_model(t: Any) -> bool:
115
127
  """Check if a type is a request model (Pydantic BaseModel).
@@ -346,15 +358,33 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
346
358
  logger.debug(
347
359
  f"Skipping config env variable expansion for disabled provider: {v.get('provider_id', '')}"
348
360
  )
349
- # Create a copy with resolved provider_id but original config
350
- disabled_provider = v.copy()
351
- disabled_provider["provider_id"] = resolved_provider_id
352
361
  continue
353
362
  except EnvVarError:
354
363
  # If we can't resolve the provider_id, continue with normal processing
355
364
  pass
356
365
 
357
- # Normal processing for non-disabled providers
366
+ # Special handling for registered resources: check if ID field resolves to empty/None
367
+ # from conditional env vars (e.g., ${env.VAR:+value}) and skip the entry if so
368
+ if isinstance(v, dict):
369
+ should_skip = False
370
+ for id_field in RESOURCE_ID_FIELDS:
371
+ if id_field in v:
372
+ try:
373
+ resolved_id = replace_env_vars(v[id_field], f"{path}[{i}].{id_field}")
374
+ if resolved_id is None or resolved_id == "":
375
+ logger.debug(
376
+ f"Skipping {path}[{i}] with empty {id_field} (conditional env var not set)"
377
+ )
378
+ should_skip = True
379
+ break
380
+ except EnvVarError as e:
381
+ logger.warning(
382
+ f"Could not resolve {id_field} in {path}[{i}], env var '{e.var_name}': {e}"
383
+ )
384
+ if should_skip:
385
+ continue
386
+
387
+ # Normal processing
358
388
  result.append(replace_env_vars(v, f"{path}[{i}]"))
359
389
  except EnvVarError as e:
360
390
  raise EnvVarError(e.var_name, e.path) from None
@@ -18,6 +18,7 @@ from llama_stack.core.storage.kvstore import kvstore_impl
18
18
  from llama_stack.log import get_logger
19
19
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
20
20
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
21
+ from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
21
22
  from llama_stack_api import (
22
23
  EmbeddedChunk,
23
24
  Files,
@@ -72,9 +73,11 @@ class FaissIndex(EmbeddingIndex):
72
73
 
73
74
  if stored_data:
74
75
  data = json.loads(stored_data)
75
- self.chunk_by_index = {
76
- int(k): EmbeddedChunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()
77
- }
76
+ self.chunk_by_index = {}
77
+ for k, v in data["chunk_by_index"].items():
78
+ chunk_data = json.loads(v)
79
+ # Use generic backward compatibility utility
80
+ self.chunk_by_index[int(k)] = load_embedded_chunk_with_backward_compat(chunk_data)
78
81
 
79
82
  buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
80
83
  try:
@@ -5,6 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  import asyncio
8
+ import json
8
9
  import re
9
10
  import sqlite3
10
11
  import struct
@@ -23,6 +24,7 @@ from llama_stack.providers.utils.memory.vector_store import (
23
24
  EmbeddingIndex,
24
25
  VectorStoreWithIndex,
25
26
  )
27
+ from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
26
28
  from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
27
29
  from llama_stack_api import (
28
30
  EmbeddedChunk,
@@ -235,7 +237,8 @@ class SQLiteVecIndex(EmbeddingIndex):
235
237
  if score < score_threshold:
236
238
  continue
237
239
  try:
238
- embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
240
+ chunk_data = json.loads(chunk_json)
241
+ embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
239
242
  except Exception as e:
240
243
  logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
241
244
  continue
@@ -276,7 +279,8 @@ class SQLiteVecIndex(EmbeddingIndex):
276
279
  if score > -score_threshold:
277
280
  continue
278
281
  try:
279
- embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
282
+ chunk_data = json.loads(chunk_json)
283
+ embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
280
284
  except Exception as e:
281
285
  logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
282
286
  continue
@@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
20
20
  provider_type="inline::meta-reference",
21
21
  pip_packages=[
22
22
  "matplotlib",
23
+ "fonttools>=4.60.2",
23
24
  "pillow",
24
25
  "pandas",
25
26
  "scikit-learn",
@@ -17,6 +17,7 @@ from llama_stack.log import get_logger
17
17
  from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
18
18
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
19
19
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
20
+ from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
20
21
  from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
21
22
  from llama_stack_api import (
22
23
  EmbeddedChunk,
@@ -60,10 +61,12 @@ class ChromaIndex(EmbeddingIndex):
60
61
  async def initialize(self):
61
62
  pass
62
63
 
63
- async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
64
- assert len(chunks) == len(embeddings), (
65
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
66
- )
64
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
65
+ if not chunks:
66
+ return
67
+
68
+ # Extract embeddings directly from chunks (already list[float])
69
+ embeddings = [chunk.embedding for chunk in chunks]
67
70
 
68
71
  ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
69
72
  await maybe_await(
@@ -84,7 +87,7 @@ class ChromaIndex(EmbeddingIndex):
84
87
  for dist, doc in zip(distances, documents, strict=False):
85
88
  try:
86
89
  doc = json.loads(doc)
87
- chunk = EmbeddedChunk(**doc)
90
+ chunk = load_embedded_chunk_with_backward_compat(doc)
88
91
  except Exception:
89
92
  log.exception(f"Failed to parse document: {doc}")
90
93
  continue
@@ -139,7 +142,7 @@ class ChromaIndex(EmbeddingIndex):
139
142
 
140
143
  for dist, doc in zip(distances, documents, strict=False):
141
144
  doc_data = json.loads(doc)
142
- chunk = EmbeddedChunk(**doc_data)
145
+ chunk = load_embedded_chunk_with_backward_compat(doc_data)
143
146
 
144
147
  score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0
145
148
 
@@ -21,7 +21,10 @@ from llama_stack.providers.utils.memory.vector_store import (
21
21
  EmbeddingIndex,
22
22
  VectorStoreWithIndex,
23
23
  )
24
- from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
24
+ from llama_stack.providers.utils.vector_io.vector_utils import (
25
+ load_embedded_chunk_with_backward_compat,
26
+ sanitize_collection_name,
27
+ )
25
28
  from llama_stack_api import (
26
29
  EmbeddedChunk,
27
30
  Files,
@@ -39,6 +42,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
39
42
 
40
43
  logger = get_logger(name=__name__, category="vector_io::milvus")
41
44
 
45
+
42
46
  VERSION = "v3"
43
47
  VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
44
48
  VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
@@ -65,10 +69,9 @@ class MilvusIndex(EmbeddingIndex):
65
69
  if await asyncio.to_thread(self.client.has_collection, self.collection_name):
66
70
  await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
67
71
 
68
- async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
69
- assert len(chunks) == len(embeddings), (
70
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
71
- )
72
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
73
+ if not chunks:
74
+ return
72
75
 
73
76
  if not await asyncio.to_thread(self.client.has_collection, self.collection_name):
74
77
  logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
@@ -81,7 +84,7 @@ class MilvusIndex(EmbeddingIndex):
81
84
  max_length=65535,
82
85
  enable_analyzer=True, # Enable text analysis for BM25
83
86
  )
84
- schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(embeddings[0]))
87
+ schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(chunks[0].embedding))
85
88
  schema.add_field(field_name="chunk_content", datatype=DataType.JSON)
86
89
  # Add sparse vector field for BM25 (required by the function)
87
90
  schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
@@ -110,12 +113,12 @@ class MilvusIndex(EmbeddingIndex):
110
113
  )
111
114
 
112
115
  data = []
113
- for chunk, embedding in zip(chunks, embeddings, strict=False):
116
+ for chunk in chunks:
114
117
  data.append(
115
118
  {
116
119
  "chunk_id": chunk.chunk_id,
117
120
  "content": chunk.content,
118
- "vector": embedding,
121
+ "vector": chunk.embedding, # Already a list[float]
119
122
  "chunk_content": chunk.model_dump(),
120
123
  # sparse field will be handled by BM25 function automatically
121
124
  }
@@ -136,7 +139,7 @@ class MilvusIndex(EmbeddingIndex):
136
139
  output_fields=["*"],
137
140
  search_params={"params": {"radius": score_threshold}},
138
141
  )
139
- chunks = [EmbeddedChunk(**res["entity"]["chunk_content"]) for res in search_res[0]]
142
+ chunks = [load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"]) for res in search_res[0]]
140
143
  scores = [res["distance"] for res in search_res[0]]
141
144
  return QueryChunksResponse(chunks=chunks, scores=scores)
142
145
 
@@ -163,7 +166,7 @@ class MilvusIndex(EmbeddingIndex):
163
166
  chunks = []
164
167
  scores = []
165
168
  for res in search_res[0]:
166
- chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
169
+ chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
167
170
  chunks.append(chunk)
168
171
  scores.append(res["distance"]) # BM25 score from Milvus
169
172
 
@@ -191,7 +194,7 @@ class MilvusIndex(EmbeddingIndex):
191
194
  output_fields=["*"],
192
195
  limit=k,
193
196
  )
194
- chunks = [EmbeddedChunk(**res["chunk_content"]) for res in search_res]
197
+ chunks = [load_embedded_chunk_with_backward_compat(res["chunk_content"]) for res in search_res]
195
198
  scores = [1.0] * len(chunks) # Simple binary score for text search
196
199
  return QueryChunksResponse(chunks=chunks, scores=scores)
197
200
 
@@ -243,7 +246,7 @@ class MilvusIndex(EmbeddingIndex):
243
246
  chunks = []
244
247
  scores = []
245
248
  for res in search_res[0]:
246
- chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
249
+ chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
247
250
  chunks.append(chunk)
248
251
  scores.append(res["distance"])
249
252
 
@@ -18,7 +18,11 @@ from llama_stack.log import get_logger
18
18
  from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
19
19
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
20
20
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
21
- from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
21
+ from llama_stack.providers.utils.vector_io.vector_utils import (
22
+ WeightedInMemoryAggregator,
23
+ load_embedded_chunk_with_backward_compat,
24
+ sanitize_collection_name,
25
+ )
22
26
  from llama_stack_api import (
23
27
  EmbeddedChunk,
24
28
  Files,
@@ -130,19 +134,18 @@ class PGVectorIndex(EmbeddingIndex):
130
134
  log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
131
135
  raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
132
136
 
133
- async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
134
- assert len(chunks) == len(embeddings), (
135
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
136
- )
137
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
138
+ if not chunks:
139
+ return
137
140
 
138
141
  values = []
139
- for i, chunk in enumerate(chunks):
142
+ for chunk in chunks:
140
143
  content_text = interleaved_content_as_str(chunk.content)
141
144
  values.append(
142
145
  (
143
146
  f"{chunk.chunk_id}",
144
147
  Json(chunk.model_dump()),
145
- embeddings[i].tolist(),
148
+ chunk.embedding, # Already a list[float]
146
149
  content_text,
147
150
  content_text, # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
148
151
  )
@@ -194,7 +197,7 @@ class PGVectorIndex(EmbeddingIndex):
194
197
  score = 1.0 / float(dist) if dist != 0 else float("inf")
195
198
  if score < score_threshold:
196
199
  continue
197
- chunks.append(EmbeddedChunk(**doc))
200
+ chunks.append(load_embedded_chunk_with_backward_compat(doc))
198
201
  scores.append(score)
199
202
 
200
203
  return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -230,7 +233,7 @@ class PGVectorIndex(EmbeddingIndex):
230
233
  for doc, score in results:
231
234
  if score < score_threshold:
232
235
  continue
233
- chunks.append(EmbeddedChunk(**doc))
236
+ chunks.append(load_embedded_chunk_with_backward_compat(doc))
234
237
  scores.append(float(score))
235
238
 
236
239
  return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -306,7 +309,8 @@ class PGVectorIndex(EmbeddingIndex):
306
309
  """Remove a chunk from the PostgreSQL table."""
307
310
  chunk_ids = [c.chunk_id for c in chunks_for_deletion]
308
311
  with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
309
- cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids))
312
+ # Fix: Use proper tuple parameter binding with explicit array cast
313
+ cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s::text[])", (chunk_ids,))
310
314
 
311
315
  def get_pgvector_search_function(self) -> str:
312
316
  return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]
@@ -18,6 +18,7 @@ from llama_stack.log import get_logger
18
18
  from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
19
19
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
20
20
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
21
+ from llama_stack.providers.utils.vector_io.vector_utils import load_embedded_chunk_with_backward_compat
21
22
  from llama_stack_api import (
22
23
  EmbeddedChunk,
23
24
  Files,
@@ -66,24 +67,23 @@ class QdrantIndex(EmbeddingIndex):
66
67
  # If the collection does not exist, it will be created in add_chunks.
67
68
  pass
68
69
 
69
- async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
70
- assert len(chunks) == len(embeddings), (
71
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
72
- )
70
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
71
+ if not chunks:
72
+ return
73
73
 
74
74
  if not await self.client.collection_exists(self.collection_name):
75
75
  await self.client.create_collection(
76
76
  self.collection_name,
77
- vectors_config=models.VectorParams(size=len(embeddings[0]), distance=models.Distance.COSINE),
77
+ vectors_config=models.VectorParams(size=len(chunks[0].embedding), distance=models.Distance.COSINE),
78
78
  )
79
79
 
80
80
  points = []
81
- for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
81
+ for chunk in chunks:
82
82
  chunk_id = chunk.chunk_id
83
83
  points.append(
84
84
  PointStruct(
85
85
  id=convert_id(chunk_id),
86
- vector=embedding,
86
+ vector=chunk.embedding, # Already a list[float]
87
87
  payload={"chunk_content": chunk.model_dump()} | {CHUNK_ID_KEY: chunk_id},
88
88
  )
89
89
  )
@@ -118,7 +118,7 @@ class QdrantIndex(EmbeddingIndex):
118
118
  assert point.payload is not None
119
119
 
120
120
  try:
121
- chunk = EmbeddedChunk(**point.payload["chunk_content"])
121
+ chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
122
122
  except Exception:
123
123
  log.exception("Failed to parse chunk")
124
124
  continue
@@ -172,7 +172,7 @@ class QdrantIndex(EmbeddingIndex):
172
172
  raise RuntimeError("Qdrant query returned point with no payload")
173
173
 
174
174
  try:
175
- chunk = EmbeddedChunk(**point.payload["chunk_content"])
175
+ chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
176
176
  except Exception:
177
177
  chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
178
178
  point_id = getattr(point, "id", "unknown")
@@ -242,7 +242,7 @@ class QdrantIndex(EmbeddingIndex):
242
242
  raise RuntimeError("Qdrant query returned point with no payload")
243
243
 
244
244
  try:
245
- chunk = EmbeddedChunk(**point.payload["chunk_content"])
245
+ chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
246
246
  except Exception:
247
247
  chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
248
248
  point_id = getattr(point, "id", "unknown")
@@ -22,6 +22,7 @@ from llama_stack.providers.utils.memory.vector_store import (
22
22
  EmbeddingIndex,
23
23
  VectorStoreWithIndex,
24
24
  )
25
+ from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
25
26
  from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
26
27
  from llama_stack_api import (
27
28
  EmbeddedChunk,
@@ -57,20 +58,19 @@ class WeaviateIndex(EmbeddingIndex):
57
58
  async def initialize(self):
58
59
  pass
59
60
 
60
- async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
61
- assert len(chunks) == len(embeddings), (
62
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
63
- )
61
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
62
+ if not chunks:
63
+ return
64
64
 
65
65
  data_objects = []
66
- for chunk, embedding in zip(chunks, embeddings, strict=False):
66
+ for chunk in chunks:
67
67
  data_objects.append(
68
68
  wvc.data.DataObject(
69
69
  properties={
70
70
  "chunk_id": chunk.chunk_id,
71
71
  "chunk_content": chunk.model_dump_json(),
72
72
  },
73
- vector=embedding.tolist(),
73
+ vector=chunk.embedding, # Already a list[float]
74
74
  )
75
75
  )
76
76
 
@@ -116,7 +116,7 @@ class WeaviateIndex(EmbeddingIndex):
116
116
  chunk_json = doc.properties["chunk_content"]
117
117
  try:
118
118
  chunk_dict = json.loads(chunk_json)
119
- chunk = EmbeddedChunk(**chunk_dict)
119
+ chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
120
120
  except Exception:
121
121
  log.exception(f"Failed to parse document: {chunk_json}")
122
122
  continue
@@ -176,7 +176,7 @@ class WeaviateIndex(EmbeddingIndex):
176
176
  chunk_json = doc.properties["chunk_content"]
177
177
  try:
178
178
  chunk_dict = json.loads(chunk_json)
179
- chunk = EmbeddedChunk(**chunk_dict)
179
+ chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
180
180
  except Exception:
181
181
  log.exception(f"Failed to parse document: {chunk_json}")
182
182
  continue
@@ -245,7 +245,7 @@ class WeaviateIndex(EmbeddingIndex):
245
245
  chunk_json = doc.properties["chunk_content"]
246
246
  try:
247
247
  chunk_dict = json.loads(chunk_json)
248
- chunk = EmbeddedChunk(**chunk_dict)
248
+ chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
249
249
  except Exception:
250
250
  log.exception(f"Failed to parse document: {chunk_json}")
251
251
  continue
@@ -3,3 +3,19 @@
3
3
  #
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
+
7
+ from .vector_utils import (
8
+ WeightedInMemoryAggregator,
9
+ generate_chunk_id,
10
+ load_embedded_chunk_with_backward_compat,
11
+ proper_case,
12
+ sanitize_collection_name,
13
+ )
14
+
15
+ __all__ = [
16
+ "WeightedInMemoryAggregator",
17
+ "generate_chunk_id",
18
+ "load_embedded_chunk_with_backward_compat",
19
+ "proper_case",
20
+ "sanitize_collection_name",
21
+ ]
@@ -7,6 +7,9 @@
7
7
  import hashlib
8
8
  import re
9
9
  import uuid
10
+ from typing import Any
11
+
12
+ from llama_stack_api import EmbeddedChunk
10
13
 
11
14
 
12
15
  def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
@@ -154,3 +157,36 @@ class WeightedInMemoryAggregator:
154
157
  # Default to RRF for None, RRF, or any unknown types
155
158
  impact_factor = reranker_params.get("impact_factor", 60.0)
156
159
  return WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor)
160
+
161
+
162
+ def load_embedded_chunk_with_backward_compat(
163
+ chunk_data: dict[str, Any],
164
+ ) -> EmbeddedChunk:
165
+ """
166
+ Load EmbeddedChunk data with backward compatibility for legacy field locations.
167
+
168
+ Handles migration from old format where embedding_model and embedding_dimension
169
+ were stored in chunk_metadata to current top-level format.
170
+
171
+ Args:
172
+ chunk_data: Dictionary containing chunk data to load
173
+
174
+ Returns:
175
+ EmbeddedChunk object with migrated data
176
+ """
177
+ # Migrate old data: extract embedding_model/embedding_dimension from chunk_metadata if missing
178
+ if "embedding_model" not in chunk_data:
179
+ chunk_metadata = chunk_data.get("chunk_metadata", {})
180
+ chunk_data["embedding_model"] = chunk_metadata.get("chunk_embedding_model", "unknown")
181
+
182
+ if "embedding_dimension" not in chunk_data:
183
+ chunk_metadata = chunk_data.get("chunk_metadata", {})
184
+ chunk_data["embedding_dimension"] = chunk_metadata.get(
185
+ "chunk_embedding_dimension", len(chunk_data.get("embedding", []))
186
+ )
187
+
188
+ # Ensure embedding field exists (required by EmbeddedChunk)
189
+ if "embedding" not in chunk_data:
190
+ chunk_data["embedding"] = []
191
+
192
+ return EmbeddedChunk(**chunk_data)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama_stack
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Llama Stack
5
5
  Author-email: Meta Llama <llama-oss@meta.com>
6
6
  License: MIT
@@ -17,7 +17,7 @@ Requires-Python: >=3.12
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: PyYAML>=6.0
20
- Requires-Dist: aiohttp
20
+ Requires-Dist: aiohttp>=3.13.3
21
21
  Requires-Dist: fastapi<1.0,>=0.115.0
22
22
  Requires-Dist: fire
23
23
  Requires-Dist: httpx
@@ -44,9 +44,9 @@ Requires-Dist: sqlalchemy[asyncio]>=2.0.41
44
44
  Requires-Dist: starlette>=0.49.1
45
45
  Requires-Dist: psycopg2-binary
46
46
  Requires-Dist: tornado>=6.5.3
47
- Requires-Dist: urllib3>=2.6.0
47
+ Requires-Dist: urllib3>=2.6.3
48
48
  Provides-Extra: client
49
- Requires-Dist: llama-stack-client==0.4.0; extra == "client"
49
+ Requires-Dist: llama-stack-client==0.4.1; extra == "client"
50
50
  Dynamic: license-file
51
51
 
52
52
  # Llama Stack
@@ -34,7 +34,7 @@ llama_stack/core/library_client.py,sha256=V5f7apz0heD5DyExwNXiEN0E5xGyQh279BeuVS
34
34
  llama_stack/core/providers.py,sha256=EblMlsWJKGHsXCTmVo-doCJ64JEpBy7-2DoupFkaTUo,5134
35
35
  llama_stack/core/request_headers.py,sha256=tUt-RvzUrl7yxbYKBe7nN5YBCgWxShz4cemLvl7XGxc,3692
36
36
  llama_stack/core/resolver.py,sha256=IRPPwi60uAe5mlj-NjAR41laP9Dp1WvAI3A-bTMB-mk,19383
37
- llama_stack/core/stack.py,sha256=dIchLUnW8gf-DVwNLhds5HbyWdiaA_Xx0SmE8XRb-DY,25973
37
+ llama_stack/core/stack.py,sha256=06pMa8FuPC8znOpSuteF80lat8f4UsvUgloa3_d64XI,27218
38
38
  llama_stack/core/start_stack.sh,sha256=3snlFzur13NS1_UnJQ6t8zK7R5DCRFJKJrz9YTJmWVA,2834
39
39
  llama_stack/core/testing_context.py,sha256=TIWetol6Sb2BSiqkq5X0knb0chG03GSpmjByFwVfY60,1438
40
40
  llama_stack/core/access_control/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
@@ -317,16 +317,16 @@ llama_stack/providers/inline/vector_io/chroma/__init__.py,sha256=gWJ-VCpFHyqmZop
317
317
  llama_stack/providers/inline/vector_io/chroma/config.py,sha256=T3dM9KqN280F9McGoIEonzfoLl3cTnJxUwH4nLq14no,925
318
318
  llama_stack/providers/inline/vector_io/faiss/__init__.py,sha256=PLWOnPuME5oEn4vEtrTeARASgTlbz_M3tUltVCuXitQ,646
319
319
  llama_stack/providers/inline/vector_io/faiss/config.py,sha256=CSXWsVXp3-HpL9p9XmsGYlPWyyip8qzJppspkuYEF04,762
320
- llama_stack/providers/inline/vector_io/faiss/faiss.py,sha256=2-J7r_ox6wlZfyD-jt6icIyu15pShuKKNcoiI1av5GY,12620
320
+ llama_stack/providers/inline/vector_io/faiss/faiss.py,sha256=h586FCWDh6Hr4dJjOUHiJZd3s-_d_Rfksk_0cwsoCVE,12853
321
321
  llama_stack/providers/inline/vector_io/milvus/__init__.py,sha256=AeZb3UEGVLZkiX6VKrZkz4F9TWsID5Es9BdqK_cD2Io,601
322
322
  llama_stack/providers/inline/vector_io/milvus/config.py,sha256=Tw8kgRV3CRaboMxIz0QqlvmN9d_eFLR8Cjzn7YRnPmo,1060
323
323
  llama_stack/providers/inline/vector_io/qdrant/__init__.py,sha256=bGzxOieUgY3AaB7QxR_otjbFmCvdEmpit8Tu8CDp-WM,696
324
324
  llama_stack/providers/inline/vector_io/qdrant/config.py,sha256=8Bav7OeCJRETEq1Cftlty-P5i0mvqcd6JDYRSGKUT1Y,855
325
325
  llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py,sha256=dmkENtICtfyYuWRN75Kz-scWkEGsmv3gaxEL2HagYb4,661
326
326
  llama_stack/providers/inline/vector_io/sqlite_vec/config.py,sha256=zwHR-7oXMSbTnXeO2pJ1BmXWGL9NBkQ76A5Wb3fjqZQ,937
327
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py,sha256=i6rWeZqOI_vg8W9v8_fOVOIMuQ1Hs7BsH2F2n6xyG8U,20664
327
+ llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py,sha256=aFHGPxoBF3CkjSNXhYFcO4MFoZEP2frmcHX1wCs4j94,20885
328
328
  llama_stack/providers/registry/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
329
- llama_stack/providers/registry/agents.py,sha256=yK3DBcBga1blgkdTtKmUPvHQyUrjvrwcgPCt7I7-_jQ,1509
329
+ llama_stack/providers/registry/agents.py,sha256=3wgdIFipxrstkGPxgpR2LoshNGa--_pe0MRLu6MLakQ,1546
330
330
  llama_stack/providers/registry/batches.py,sha256=YXUNlKbpWk3JJhO69nU3WHVNm9JRmyHTzbiDoET_3fw,885
331
331
  llama_stack/providers/registry/datasetio.py,sha256=d2eJsjkACtMEpYd3OOKC32fIcQ5a-3H0MGCxTZyk63o,1886
332
332
  llama_stack/providers/registry/eval.py,sha256=RQR6eq3G-JSIiMJFS_mlNmC6m-KQRGuteshhyEG6e1Q,1732
@@ -457,20 +457,20 @@ llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py,sha256=RK12PdV
457
457
  llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py,sha256=AdHGz0cIpbou8_o0Hz5gKgAza1JctoEXpVHrD-QxPWc,5198
458
458
  llama_stack/providers/remote/vector_io/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
459
459
  llama_stack/providers/remote/vector_io/chroma/__init__.py,sha256=OgzNrPHV3mT7ze6QEMJ-uJSldaqKFzeKT3S-rHz26J4,554
460
- llama_stack/providers/remote/vector_io/chroma/chroma.py,sha256=4dX77W7ppos7WOHk3vVhE8DM1dIDas2gP6auWF_LFdI,12581
460
+ llama_stack/providers/remote/vector_io/chroma/chroma.py,sha256=aboi3svqfUigfIhQHAl7QnGjZR-nlhgkc1sL5OS7rY4,12724
461
461
  llama_stack/providers/remote/vector_io/chroma/config.py,sha256=Wzz7KsTv4eQkE0KLfq_APxendaoPli6cSzLkSb-c_1Y,908
462
462
  llama_stack/providers/remote/vector_io/milvus/__init__.py,sha256=k2E3oS_HU9BFMnyNkhMtutxjfgw3f3In1-pm2AmgeZ8,649
463
463
  llama_stack/providers/remote/vector_io/milvus/config.py,sha256=ZRf00ifVSEC5te8WwAv2RtAYo6ar1UdpHxlZIC66unc,1456
464
- llama_stack/providers/remote/vector_io/milvus/milvus.py,sha256=dSRIebdYsqP4hSH-1qbJCIqQDFmBeagclZ8abCuAnM0,16385
464
+ llama_stack/providers/remote/vector_io/milvus/milvus.py,sha256=epkA43GUBu6u5-uGw1Dk-MNcETTjve4GCGo52p46wnY,16406
465
465
  llama_stack/providers/remote/vector_io/pgvector/__init__.py,sha256=yAFgSkT7gwkRtO-xWtm4nkP99QQxZAxsynDUEybIf9Y,564
466
466
  llama_stack/providers/remote/vector_io/pgvector/config.py,sha256=yMVFQf4fJDev8ShnbnOicXnLF9r45MIxCKYcbmDFyfw,1540
467
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py,sha256=_Uz6DjuaHMZmYzHAJI4f7PCfd5PKOmWnbESRQXAB2_o,19530
467
+ llama_stack/providers/remote/vector_io/pgvector/pgvector.py,sha256=UwGoaSFgPOJGp6jdWdMcB5ZSGpeP7RP13TTUfk1wD2U,19601
468
468
  llama_stack/providers/remote/vector_io/qdrant/__init__.py,sha256=Tim4k1BhOROl5BCHuS1YzH6_MilgNdNrv1qe4zFcla4,554
469
469
  llama_stack/providers/remote/vector_io/qdrant/config.py,sha256=4akCc4YbYYIBCENj8NRUm3OI-ix0zTOmbgq8RG3nIWU,1115
470
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py,sha256=XY-wOE0d4l1QNhtiwXRBw0FS0HUx1o9j8ZqnI8i1j1E,15377
470
+ llama_stack/providers/remote/vector_io/qdrant/qdrant.py,sha256=VlA-y7F52LC4paHEV6BRQyxWAdBBzh0gWH1hUUs7JMQ,15404
471
471
  llama_stack/providers/remote/vector_io/weaviate/__init__.py,sha256=2lqJMJK8Fw6p_S9kRX088SVqvnC1ctWrSf_niPNgYm8,564
472
472
  llama_stack/providers/remote/vector_io/weaviate/config.py,sha256=4hlK60GLZxDbevGasw2JipsEh2a-afdBohkB5MsIeBw,1231
473
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py,sha256=wBG7Whva5Kzb_R7cXHrPNT7VxwabshBERE0-KJX7VLQ,17257
473
+ llama_stack/providers/remote/vector_io/weaviate/weaviate.py,sha256=NrPwEgG9fDyM6u-0XoaBGDDQ0lq_R6FOzGXBpNqSB10,17273
474
474
  llama_stack/providers/utils/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
475
475
  llama_stack/providers/utils/pagination.py,sha256=I0OgIaavpfxIQajYKVmgbSsT9Q5LjvWDfAOSjjb-Yow,1391
476
476
  llama_stack/providers/utils/scheduler.py,sha256=kP6lR0KPsYnd_mtwInuqegNqTSylZ09WiDQfiDyaX2k,8473
@@ -506,14 +506,14 @@ llama_stack/providers/utils/scoring/basic_scoring_utils.py,sha256=JmGA65N55raHR7
506
506
  llama_stack/providers/utils/tools/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
507
507
  llama_stack/providers/utils/tools/mcp.py,sha256=zjGAkmPRRcNALfp9uWaTNKl0WRaCTf7jrASY-Ka1-SA,9315
508
508
  llama_stack/providers/utils/tools/ttl_dict.py,sha256=4Bv3Nri9HM2FSckfaJJbqzICpO2S_yOXcsgVj_yvsoA,2021
509
- llama_stack/providers/utils/vector_io/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
510
- llama_stack/providers/utils/vector_io/vector_utils.py,sha256=ukOffg0ptQkWP75Ai-4wO_hxVHsdXOVEtGmX1KUEYFg,5856
509
+ llama_stack/providers/utils/vector_io/__init__.py,sha256=fGP7xUTCZ3E77v3FtEuGyW2k3S5Tn9x0Kk1aEIafoxA,552
510
+ llama_stack/providers/utils/vector_io/vector_utils.py,sha256=l1asZcxbtlRIaZUi_LbXagclCAveD-k6w28LfOZwqBk,7147
511
511
  llama_stack/telemetry/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
512
512
  llama_stack/telemetry/constants.py,sha256=LtXE61xwNL3cBYZXKcXcbwD_Uh1jazP3V8a0odWBbAs,1118
513
513
  llama_stack/telemetry/helpers.py,sha256=7uarMIHL5ngOUXQZxkH96corFxE7Jk5JaizRQ8Z8Ok0,1694
514
514
  llama_stack/testing/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
515
515
  llama_stack/testing/api_recorder.py,sha256=oGGTrzzBYNNvOIcvcFZenNPthr0yziJ7hlGPtckx460,39240
516
- llama_stack-0.4.0.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
516
+ llama_stack-0.4.1.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
517
517
  llama_stack_api/__init__.py,sha256=5XNQGpundjXTutLgnYp6B1t6KITWXH_of626GciNma4,28103
518
518
  llama_stack_api/agents.py,sha256=u0sg3AoWCip5o8T4DMTM8uqP3BsdbkKbor3PmxKTg0g,7143
519
519
  llama_stack_api/connectors.py,sha256=PcAwndbVQC6pm5HGSlNprqYFTZzhCM7SYHPyRkSIoaQ,4644
@@ -581,8 +581,8 @@ llama_stack_api/providers/__init__.py,sha256=a_187ghsdPNYJ5xLizqKYREJJLBa-lpcIhL
581
581
  llama_stack_api/providers/api.py,sha256=ytwxri9s6p8j9ClFKgN9mfa1TF0VZh1o8W5cVZR49rc,534
582
582
  llama_stack_api/providers/fastapi_routes.py,sha256=jb1yrXEk1MdtcgWCToSZtaB-wjKqv5uVKIkvduXoKlM,1962
583
583
  llama_stack_api/providers/models.py,sha256=nqBzh9je_dou35XFjYGD43hwKgjWy6HIRmGWUrcGqOw,653
584
- llama_stack-0.4.0.dist-info/METADATA,sha256=UlhgLD3RZAM1vBEIoqlzPPX62IIfe3H06E9AcH7AdKg,12456
585
- llama_stack-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
586
- llama_stack-0.4.0.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
587
- llama_stack-0.4.0.dist-info/top_level.txt,sha256=pyNYneZU5w62BaExic-GC1ph5kk8JI2mJFwzqiZy2cU,28
588
- llama_stack-0.4.0.dist-info/RECORD,,
584
+ llama_stack-0.4.1.dist-info/METADATA,sha256=NKwt6q3Lm1sKiw_E4IDkaFK_K7OyGaIJss12ghzGjBM,12464
585
+ llama_stack-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
586
+ llama_stack-0.4.1.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
587
+ llama_stack-0.4.1.dist-info/top_level.txt,sha256=pyNYneZU5w62BaExic-GC1ph5kk8JI2mJFwzqiZy2cU,28
588
+ llama_stack-0.4.1.dist-info/RECORD,,