llama-stack 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. llama_stack/core/library_client.py +80 -3
  2. llama_stack/core/routing_tables/common.py +11 -0
  3. llama_stack/core/routing_tables/vector_stores.py +4 -0
  4. llama_stack/core/stack.py +16 -1
  5. llama_stack/core/storage/kvstore/kvstore.py +11 -0
  6. llama_stack/core/storage/kvstore/mongodb/mongodb.py +5 -0
  7. llama_stack/core/storage/kvstore/postgres/postgres.py +8 -0
  8. llama_stack/core/storage/kvstore/redis/redis.py +5 -0
  9. llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py +8 -0
  10. llama_stack/core/storage/sqlstore/sqlstore.py +8 -0
  11. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +60 -34
  12. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +4 -0
  13. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +9 -1
  14. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +13 -1
  15. llama_stack/providers/utils/inference/embedding_mixin.py +20 -16
  16. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +33 -0
  17. llama_stack/providers/utils/tools/mcp.py +258 -16
  18. {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/METADATA +2 -2
  19. {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/RECORD +29 -29
  20. {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/WHEEL +1 -1
  21. llama_stack_api/internal/kvstore.py +2 -0
  22. llama_stack_api/internal/sqlstore.py +2 -0
  23. llama_stack_api/llama_stack_api/internal/kvstore.py +2 -0
  24. llama_stack_api/llama_stack_api/internal/sqlstore.py +2 -0
  25. llama_stack_api/llama_stack_api/vector_stores.py +2 -0
  26. llama_stack_api/vector_stores.py +2 -0
  27. {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/entry_points.txt +0 -0
  28. {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/licenses/LICENSE +0 -0
  29. {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/top_level.txt +0 -0
@@ -161,6 +161,45 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
161
161
  """
162
162
  pass
163
163
 
164
+ def shutdown(self) -> None:
165
+ """Shutdown the client and release all resources.
166
+
167
+ This method should be called when you're done using the client to properly
168
+ close database connections and release other resources. Failure to call this
169
+ method may result in the program hanging on exit while waiting for background
170
+ threads to complete.
171
+
172
+ This method is idempotent and can be called multiple times safely.
173
+
174
+ Example:
175
+ client = LlamaStackAsLibraryClient("starter")
176
+ # ... use the client ...
177
+ client.shutdown()
178
+ """
179
+ loop = self.loop
180
+ asyncio.set_event_loop(loop)
181
+ try:
182
+ loop.run_until_complete(self.async_client.shutdown())
183
+ finally:
184
+ loop.close()
185
+ asyncio.set_event_loop(None)
186
+
187
+ def __enter__(self) -> "LlamaStackAsLibraryClient":
188
+ """Enter the context manager.
189
+
190
+ The client is already initialized in __init__, so this just returns self.
191
+
192
+ Example:
193
+ with LlamaStackAsLibraryClient("starter") as client:
194
+ response = client.models.list()
195
+ # Client is automatically shut down here
196
+ """
197
+ return self
198
+
199
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
200
+ """Exit the context manager and shut down the client."""
201
+ self.shutdown()
202
+
164
203
  def request(self, *args, **kwargs):
165
204
  loop = self.loop
166
205
  asyncio.set_event_loop(loop)
@@ -224,6 +263,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
224
263
  self.custom_provider_registry = custom_provider_registry
225
264
  self.provider_data = provider_data
226
265
  self.route_impls: RouteImpls | None = None # Initialize to None to prevent AttributeError
266
+ self.stack: Stack | None = None
227
267
 
228
268
  def _remove_root_logger_handlers(self):
229
269
  """
@@ -246,9 +286,9 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
246
286
  try:
247
287
  self.route_impls = None
248
288
 
249
- stack = Stack(self.config, self.custom_provider_registry)
250
- await stack.initialize()
251
- self.impls = stack.impls
289
+ self.stack = Stack(self.config, self.custom_provider_registry)
290
+ await self.stack.initialize()
291
+ self.impls = self.stack.impls
252
292
  except ModuleNotFoundError as _e:
253
293
  cprint(_e.msg, color="red", file=sys.stderr)
254
294
  cprint(
@@ -283,6 +323,43 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
283
323
  self.route_impls = initialize_route_impls(self.impls)
284
324
  return True
285
325
 
326
+ async def shutdown(self) -> None:
327
+ """Shutdown the client and release all resources.
328
+
329
+ This method should be called when you're done using the client to properly
330
+ close database connections and release other resources. Failure to call this
331
+ method may result in the program hanging on exit while waiting for background
332
+ threads to complete.
333
+
334
+ This method is idempotent and can be called multiple times safely.
335
+
336
+ Example:
337
+ client = AsyncLlamaStackAsLibraryClient("starter")
338
+ await client.initialize()
339
+ # ... use the client ...
340
+ await client.shutdown()
341
+ """
342
+ if self.stack:
343
+ await self.stack.shutdown()
344
+ self.stack = None
345
+
346
+ async def __aenter__(self) -> "AsyncLlamaStackAsLibraryClient":
347
+ """Enter the async context manager.
348
+
349
+ Initializes the client and returns it.
350
+
351
+ Example:
352
+ async with AsyncLlamaStackAsLibraryClient("starter") as client:
353
+ response = await client.models.list()
354
+ # Client is automatically shut down here
355
+ """
356
+ await self.initialize()
357
+ return self
358
+
359
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
360
+ """Exit the async context manager and shut down the client."""
361
+ await self.shutdown()
362
+
286
363
  async def request(
287
364
  self,
288
365
  cast_to: Any,
@@ -209,6 +209,17 @@ class CommonRoutingTableImpl(RoutingTable):
209
209
  logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
210
210
 
211
211
  registered_obj = await register_object_with_provider(obj, p)
212
+
213
+ # Ensure OpenAI metadata exists for vector stores
214
+ if obj.type == ResourceType.vector_store.value:
215
+ if hasattr(p, "_ensure_openai_metadata_exists"):
216
+ await p._ensure_openai_metadata_exists(obj)
217
+ else:
218
+ logger.warning(
219
+ f"Provider {obj.provider_id} does not support OpenAI metadata creation. "
220
+ f"Vector store {obj.identifier} may not work with OpenAI-compatible APIs."
221
+ )
222
+
212
223
  # TODO: This needs to be fixed for all APIs once they return the registered object
213
224
  if obj.type == ResourceType.model.value:
214
225
  await self.dist_registry.register(registered_obj)
@@ -55,6 +55,10 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
55
55
 
56
56
  # Internal methods only - no public API exposure
57
57
 
58
+ async def list_vector_stores(self) -> list[VectorStoreWithOwner]:
59
+ """List all registered vector stores."""
60
+ return await self.get_all_with_type(ResourceType.vector_store.value)
61
+
58
62
  async def register_vector_store(
59
63
  self,
60
64
  vector_store_id: str,
llama_stack/core/stack.py CHANGED
@@ -108,6 +108,7 @@ RESOURCES = [
108
108
  ),
109
109
  ("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks", RegisterBenchmarkRequest),
110
110
  ("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups", None),
111
+ ("vector_stores", Api.vector_stores, "register_vector_store", "list_vector_stores", None),
111
112
  ]
112
113
 
113
114
 
@@ -620,7 +621,7 @@ class Stack:
620
621
  async def shutdown(self):
621
622
  for impl in self.impls.values():
622
623
  impl_name = impl.__class__.__name__
623
- logger.info(f"Shutting down {impl_name}")
624
+ logger.debug(f"Shutting down {impl_name}")
624
625
  try:
625
626
  if hasattr(impl, "shutdown"):
626
627
  await asyncio.wait_for(impl.shutdown(), timeout=5)
@@ -642,6 +643,20 @@ class Stack:
642
643
  if REGISTRY_REFRESH_TASK:
643
644
  REGISTRY_REFRESH_TASK.cancel()
644
645
 
646
+ # Shutdown storage backends
647
+ from llama_stack.core.storage.kvstore.kvstore import shutdown_kvstore_backends
648
+ from llama_stack.core.storage.sqlstore.sqlstore import shutdown_sqlstore_backends
649
+
650
+ try:
651
+ await shutdown_kvstore_backends()
652
+ except Exception as e:
653
+ logger.exception(f"Failed to shutdown KV store backends: {e}")
654
+
655
+ try:
656
+ await shutdown_sqlstore_backends()
657
+ except Exception as e:
658
+ logger.exception(f"Failed to shutdown SQL store backends: {e}")
659
+
645
660
 
646
661
  async def refresh_registry_once(impls: dict[Api, Any]):
647
662
  logger.debug("refreshing registry")
@@ -62,6 +62,9 @@ class InmemoryKVStoreImpl(KVStore):
62
62
  async def delete(self, key: str) -> None:
63
63
  del self._store[key]
64
64
 
65
+ async def shutdown(self) -> None:
66
+ self._store.clear()
67
+
65
68
 
66
69
  _KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
67
70
  _KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
@@ -126,3 +129,11 @@ async def kvstore_impl(reference: KVStoreReference) -> KVStore:
126
129
  await impl.initialize()
127
130
  _KVSTORE_INSTANCES[cache_key] = impl
128
131
  return impl
132
+
133
+
134
+ async def shutdown_kvstore_backends() -> None:
135
+ """Shutdown all cached KV store instances."""
136
+ global _KVSTORE_INSTANCES
137
+ for instance in _KVSTORE_INSTANCES.values():
138
+ await instance.shutdown()
139
+ _KVSTORE_INSTANCES.clear()
@@ -83,3 +83,8 @@ class MongoDBKVStoreImpl(KVStore):
83
83
  async for doc in cursor:
84
84
  result.append(doc["key"])
85
85
  return result
86
+
87
+ async def shutdown(self) -> None:
88
+ if self.conn:
89
+ await self.conn.close()
90
+ self.conn = None
@@ -123,3 +123,11 @@ class PostgresKVStoreImpl(KVStore):
123
123
  (start_key, end_key),
124
124
  )
125
125
  return [row[0] for row in cursor.fetchall()]
126
+
127
+ async def shutdown(self) -> None:
128
+ if self._cursor:
129
+ self._cursor.close()
130
+ self._cursor = None
131
+ if self._conn:
132
+ self._conn.close()
133
+ self._conn = None
@@ -99,3 +99,8 @@ class RedisKVStoreImpl(KVStore):
99
99
  if cursor == 0:
100
100
  break
101
101
  return result
102
+
103
+ async def shutdown(self) -> None:
104
+ if self._redis:
105
+ await self._redis.close()
106
+ self._redis = None
@@ -107,6 +107,14 @@ class SqlAlchemySqlStoreImpl(SqlStore):
107
107
 
108
108
  return engine
109
109
 
110
+ async def shutdown(self) -> None:
111
+ """Dispose the session maker's engine and close all connections."""
112
+ # The async_session holds a reference to the engine created in __init__
113
+ if self.async_session:
114
+ engine = self.async_session.kw.get("bind")
115
+ if engine:
116
+ await engine.dispose()
117
+
110
118
  async def create_table(
111
119
  self,
112
120
  table: str,
@@ -85,3 +85,11 @@ def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> Non
85
85
  _SQLSTORE_LOCKS.clear()
86
86
  for name, cfg in backends.items():
87
87
  _SQLSTORE_BACKENDS[name] = cfg
88
+
89
+
90
+ async def shutdown_sqlstore_backends() -> None:
91
+ """Shutdown all cached SQL store instances."""
92
+ global _SQLSTORE_INSTANCES
93
+ for instance in _SQLSTORE_INSTANCES.values():
94
+ await instance.shutdown()
95
+ _SQLSTORE_INSTANCES.clear()
@@ -4,6 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ import asyncio
7
8
  import re
8
9
  import time
9
10
  import uuid
@@ -16,6 +17,7 @@ from llama_stack.providers.utils.responses.responses_store import (
16
17
  ResponsesStore,
17
18
  _OpenAIResponseObjectWithInputAndMessages,
18
19
  )
20
+ from llama_stack.providers.utils.tools.mcp import MCPSessionManager
19
21
  from llama_stack_api import (
20
22
  ConversationItem,
21
23
  Conversations,
@@ -489,6 +491,19 @@ class OpenAIResponsesImpl:
489
491
  response_id = f"resp_{uuid.uuid4()}"
490
492
  created_at = int(time.time())
491
493
 
494
+ # Create a per-request MCP session manager for session reuse (fix for #4452)
495
+ # This avoids redundant tools/list calls when making multiple MCP tool invocations
496
+ mcp_session_manager = MCPSessionManager()
497
+
498
+ # Create a per-request ToolExecutor with the session manager
499
+ request_tool_executor = ToolExecutor(
500
+ tool_groups_api=self.tool_groups_api,
501
+ tool_runtime_api=self.tool_runtime_api,
502
+ vector_io_api=self.vector_io_api,
503
+ vector_stores_config=self.tool_executor.vector_stores_config,
504
+ mcp_session_manager=mcp_session_manager,
505
+ )
506
+
492
507
  orchestrator = StreamingResponseOrchestrator(
493
508
  inference_api=self.inference_api,
494
509
  ctx=ctx,
@@ -498,7 +513,7 @@ class OpenAIResponsesImpl:
498
513
  text=text,
499
514
  max_infer_iters=max_infer_iters,
500
515
  parallel_tool_calls=parallel_tool_calls,
501
- tool_executor=self.tool_executor,
516
+ tool_executor=request_tool_executor,
502
517
  safety_api=self.safety_api,
503
518
  guardrail_ids=guardrail_ids,
504
519
  instructions=instructions,
@@ -513,41 +528,52 @@ class OpenAIResponsesImpl:
513
528
 
514
529
  # Type as ConversationItem to avoid list invariance issues
515
530
  output_items: list[ConversationItem] = []
516
- async for stream_chunk in orchestrator.create_response():
517
- match stream_chunk.type:
518
- case "response.completed" | "response.incomplete":
519
- final_response = stream_chunk.response
520
- case "response.failed":
521
- failed_response = stream_chunk.response
522
- case "response.output_item.done":
523
- item = stream_chunk.item
524
- output_items.append(item)
525
- case _:
526
- pass # Other event types
527
-
528
- # Store and sync before yielding terminal events
529
- # This ensures the storage/syncing happens even if the consumer breaks after receiving the event
530
- if (
531
- stream_chunk.type in {"response.completed", "response.incomplete"}
532
- and final_response
533
- and failed_response is None
534
- ):
535
- messages_to_store = list(
536
- filter(lambda x: not isinstance(x, OpenAISystemMessageParam), orchestrator.final_messages)
537
- )
538
- if store:
539
- # TODO: we really should work off of output_items instead of "final_messages"
540
- await self._store_response(
541
- response=final_response,
542
- input=all_input,
543
- messages=messages_to_store,
531
+ try:
532
+ async for stream_chunk in orchestrator.create_response():
533
+ match stream_chunk.type:
534
+ case "response.completed" | "response.incomplete":
535
+ final_response = stream_chunk.response
536
+ case "response.failed":
537
+ failed_response = stream_chunk.response
538
+ case "response.output_item.done":
539
+ item = stream_chunk.item
540
+ output_items.append(item)
541
+ case _:
542
+ pass # Other event types
543
+
544
+ # Store and sync before yielding terminal events
545
+ # This ensures the storage/syncing happens even if the consumer breaks after receiving the event
546
+ if (
547
+ stream_chunk.type in {"response.completed", "response.incomplete"}
548
+ and final_response
549
+ and failed_response is None
550
+ ):
551
+ messages_to_store = list(
552
+ filter(lambda x: not isinstance(x, OpenAISystemMessageParam), orchestrator.final_messages)
544
553
  )
554
+ if store:
555
+ # TODO: we really should work off of output_items instead of "final_messages"
556
+ await self._store_response(
557
+ response=final_response,
558
+ input=all_input,
559
+ messages=messages_to_store,
560
+ )
545
561
 
546
- if conversation:
547
- await self._sync_response_to_conversation(conversation, input, output_items)
548
- await self.responses_store.store_conversation_messages(conversation, messages_to_store)
549
-
550
- yield stream_chunk
562
+ if conversation:
563
+ await self._sync_response_to_conversation(conversation, input, output_items)
564
+ await self.responses_store.store_conversation_messages(conversation, messages_to_store)
565
+
566
+ yield stream_chunk
567
+ finally:
568
+ # Clean up MCP sessions at the end of the request (fix for #4452)
569
+ # Use shield() to prevent cancellation from interrupting cleanup and leaking resources
570
+ # Wrap in try/except as cleanup errors should not mask the original response
571
+ try:
572
+ await asyncio.shield(mcp_session_manager.close_all())
573
+ except BaseException as e:
574
+ # Debug level - cleanup errors are expected in streaming scenarios where
575
+ # anyio cancel scopes may be in a different task context
576
+ logger.debug(f"Error during MCP session cleanup: {e}")
551
577
 
552
578
  async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
553
579
  return await self.responses_store.delete_response_object(response_id)
@@ -1200,6 +1200,9 @@ class StreamingResponseOrchestrator:
1200
1200
  "mcp_list_tools_id": list_id,
1201
1201
  }
1202
1202
 
1203
+ # Get session manager from tool_executor if available (fix for #4452)
1204
+ session_manager = getattr(self.tool_executor, "mcp_session_manager", None)
1205
+
1203
1206
  # TODO: follow semantic conventions for Open Telemetry tool spans
1204
1207
  # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
1205
1208
  with tracer.start_as_current_span("list_mcp_tools", attributes=attributes):
@@ -1207,6 +1210,7 @@ class StreamingResponseOrchestrator:
1207
1210
  endpoint=mcp_tool.server_url,
1208
1211
  headers=mcp_tool.headers,
1209
1212
  authorization=mcp_tool.authorization,
1213
+ session_manager=session_manager,
1210
1214
  )
1211
1215
 
1212
1216
  # Create the MCP list tools message
@@ -54,11 +54,14 @@ class ToolExecutor:
54
54
  tool_runtime_api: ToolRuntime,
55
55
  vector_io_api: VectorIO,
56
56
  vector_stores_config=None,
57
+ mcp_session_manager=None,
57
58
  ):
58
59
  self.tool_groups_api = tool_groups_api
59
60
  self.tool_runtime_api = tool_runtime_api
60
61
  self.vector_io_api = vector_io_api
61
62
  self.vector_stores_config = vector_stores_config
63
+ # Optional MCPSessionManager for session reuse within a request (fix for #4452)
64
+ self.mcp_session_manager = mcp_session_manager
62
65
 
63
66
  async def execute_tool_call(
64
67
  self,
@@ -233,6 +236,7 @@ class ToolExecutor:
233
236
  "document_ids": [r.file_id for r in search_results],
234
237
  "chunks": [r.content[0].text if r.content else "" for r in search_results],
235
238
  "scores": [r.score for r in search_results],
239
+ "attributes": [r.attributes or {} for r in search_results],
236
240
  "citation_files": citation_files,
237
241
  },
238
242
  )
@@ -327,12 +331,14 @@ class ToolExecutor:
327
331
  # TODO: follow semantic conventions for Open Telemetry tool spans
328
332
  # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
329
333
  with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
334
+ # Pass session_manager for session reuse within request (fix for #4452)
330
335
  result = await invoke_mcp_tool(
331
336
  endpoint=mcp_tool.server_url,
332
337
  tool_name=function_name,
333
338
  kwargs=tool_kwargs,
334
339
  headers=mcp_tool.headers,
335
340
  authorization=mcp_tool.authorization,
341
+ session_manager=self.mcp_session_manager,
336
342
  )
337
343
  elif function_name == "knowledge_search":
338
344
  response_file_search_tool = (
@@ -464,16 +470,18 @@ class ToolExecutor:
464
470
  )
465
471
  if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata:
466
472
  message.results = []
473
+ attributes_list = metadata.get("attributes", [])
467
474
  for i, doc_id in enumerate(metadata["document_ids"]):
468
475
  text = metadata["chunks"][i] if "chunks" in metadata else None
469
476
  score = metadata["scores"][i] if "scores" in metadata else None
477
+ attrs = attributes_list[i] if i < len(attributes_list) else {}
470
478
  message.results.append(
471
479
  OpenAIResponseOutputMessageFileSearchToolCallResults(
472
480
  file_id=doc_id,
473
481
  filename=doc_id,
474
482
  text=text if text is not None else "",
475
483
  score=score if score is not None else 0.0,
476
- attributes={},
484
+ attributes=attrs,
477
485
  )
478
486
  )
479
487
  if has_error:
@@ -10,6 +10,7 @@ from typing import Any
10
10
  import psycopg2
11
11
  from numpy.typing import NDArray
12
12
  from psycopg2 import sql
13
+ from psycopg2.extensions import cursor
13
14
  from psycopg2.extras import Json, execute_values
14
15
  from pydantic import BaseModel, TypeAdapter
15
16
 
@@ -54,6 +55,17 @@ def check_extension_version(cur):
54
55
  return result[0] if result else None
55
56
 
56
57
 
58
+ def create_vector_extension(cur: cursor) -> None:
59
+ try:
60
+ log.info("Vector extension not found, creating...")
61
+ cur.execute("CREATE EXTENSION vector;")
62
+ log.info("Vector extension created successfully")
63
+ log.info(f"Vector extension version: {check_extension_version(cur)}")
64
+
65
+ except psycopg2.Error as e:
66
+ raise RuntimeError(f"Failed to create vector extension for PGVector: {e}") from e
67
+
68
+
57
69
  def upsert_models(conn, keys_models: list[tuple[str, BaseModel]]):
58
70
  with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
59
71
  query = sql.SQL(
@@ -364,7 +376,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
364
376
  if version:
365
377
  log.info(f"Vector extension version: {version}")
366
378
  else:
367
- raise RuntimeError("Vector extension is not installed.")
379
+ create_vector_extension(cur)
368
380
 
369
381
  cur.execute(
370
382
  """
@@ -25,7 +25,8 @@ from llama_stack_api import (
25
25
  OpenAIEmbeddingUsage,
26
26
  )
27
27
 
28
- EMBEDDING_MODELS = {}
28
+ EMBEDDING_MODELS: dict[str, "SentenceTransformer"] = {}
29
+ EMBEDDING_MODELS_LOCK = asyncio.Lock()
29
30
 
30
31
  DARWIN = "Darwin"
31
32
 
@@ -76,26 +77,29 @@ class SentenceTransformerEmbeddingMixin:
76
77
  )
77
78
 
78
79
  async def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
79
- global EMBEDDING_MODELS
80
-
81
80
  loaded_model = EMBEDDING_MODELS.get(model)
82
81
  if loaded_model is not None:
83
82
  return loaded_model
84
83
 
85
- log.info(f"Loading sentence transformer for {model}...")
84
+ async with EMBEDDING_MODELS_LOCK:
85
+ loaded_model = EMBEDDING_MODELS.get(model)
86
+ if loaded_model is not None:
87
+ return loaded_model
88
+
89
+ log.info(f"Loading sentence transformer for {model}...")
86
90
 
87
- def _load_model():
88
- from sentence_transformers import SentenceTransformer
91
+ def _load_model():
92
+ from sentence_transformers import SentenceTransformer
89
93
 
90
- platform_name = platform.system()
91
- if platform_name == DARWIN:
92
- # PyTorch's OpenMP kernels can segfault on macOS when spawned from background
93
- # threads with the default parallel settings, so force a single-threaded CPU run.
94
- log.debug(f"Constraining torch threads on {platform_name} to a single worker")
95
- torch.set_num_threads(1)
94
+ platform_name = platform.system()
95
+ if platform_name == DARWIN:
96
+ # PyTorch's OpenMP kernels can segfault on macOS when spawned from background
97
+ # threads with the default parallel settings, so force a single-threaded CPU run.
98
+ log.debug(f"Constraining torch threads on {platform_name} to a single worker")
99
+ torch.set_num_threads(1)
96
100
 
97
- return SentenceTransformer(model, trust_remote_code=True)
101
+ return SentenceTransformer(model, trust_remote_code=True)
98
102
 
99
- loaded_model = await asyncio.to_thread(_load_model)
100
- EMBEDDING_MODELS[model] = loaded_model
101
- return loaded_model
103
+ loaded_model = await asyncio.to_thread(_load_model)
104
+ EMBEDDING_MODELS[model] = loaded_model
105
+ return loaded_model
@@ -122,6 +122,39 @@ class OpenAIVectorStoreMixin(ABC):
122
122
  # update in-memory cache
123
123
  self.openai_vector_stores[store_id] = store_info
124
124
 
125
+ async def _ensure_openai_metadata_exists(self, vector_store: VectorStore, name: str | None = None) -> None:
126
+ """
127
+ Ensure OpenAI-compatible metadata exists for a vector store.
128
+ """
129
+ if vector_store.identifier not in self.openai_vector_stores:
130
+ store_info = {
131
+ "id": vector_store.identifier,
132
+ "object": "vector_store",
133
+ "created_at": int(time.time()),
134
+ "name": name or vector_store.vector_store_name or vector_store.identifier,
135
+ "usage_bytes": 0,
136
+ "file_counts": VectorStoreFileCounts(
137
+ cancelled=0,
138
+ completed=0,
139
+ failed=0,
140
+ in_progress=0,
141
+ total=0,
142
+ ).model_dump(),
143
+ "status": "completed",
144
+ "expires_after": None,
145
+ "expires_at": None,
146
+ "last_active_at": int(time.time()),
147
+ "file_ids": [],
148
+ "chunking_strategy": None,
149
+ "metadata": {
150
+ "provider_id": vector_store.provider_id,
151
+ "provider_vector_store_id": vector_store.provider_resource_id,
152
+ "embedding_model": vector_store.embedding_model,
153
+ "embedding_dimension": str(vector_store.embedding_dimension),
154
+ },
155
+ }
156
+ await self._save_openai_vector_store(vector_store.identifier, store_info)
157
+
125
158
  async def _load_openai_vector_stores(self) -> dict[str, dict[str, Any]]:
126
159
  """Load all vector store metadata from persistent storage."""
127
160
  assert self.kvstore
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ import asyncio
8
+ import hashlib
7
9
  from collections.abc import AsyncGenerator
8
10
  from contextlib import asynccontextmanager
9
11
  from enum import Enum
@@ -73,6 +75,207 @@ class MCPProtol(Enum):
73
75
  SSE = 2
74
76
 
75
77
 
78
+ class MCPSessionManager:
79
+ """Manages MCP session lifecycle within a request scope.
80
+
81
+ This class caches MCP sessions by (endpoint, headers_hash) to avoid redundant
82
+ connection establishment and tools/list calls when making multiple tool
83
+ invocations to the same MCP server within a single request.
84
+
85
+ Fix for GitHub issue #4452: MCP tools/list called redundantly before every
86
+ tool invocation.
87
+
88
+ Usage:
89
+ async with MCPSessionManager() as session_manager:
90
+ # Multiple tool calls will reuse the same session
91
+ result1 = await invoke_mcp_tool(..., session_manager=session_manager)
92
+ result2 = await invoke_mcp_tool(..., session_manager=session_manager)
93
+ """
94
+
95
+ def __init__(self):
96
+ # Cache of active sessions: key -> (session, client_context, session_context)
97
+ self._sessions: dict[str, tuple[ClientSession, Any, Any]] = {}
98
+ # Locks to prevent concurrent session creation for the same key
99
+ self._locks: dict[str, asyncio.Lock] = {}
100
+ # Global lock for managing the locks dict
101
+ self._global_lock = asyncio.Lock()
102
+
103
+ def _make_key(self, endpoint: str, headers: dict[str, str]) -> str:
104
+ """Create a cache key from endpoint and headers."""
105
+ # Sort headers for consistent hashing
106
+ headers_str = str(sorted(headers.items()))
107
+ headers_hash = hashlib.sha256(headers_str.encode()).hexdigest()[:16]
108
+ return f"{endpoint}:{headers_hash}"
109
+
110
+ async def _get_lock(self, key: str) -> asyncio.Lock:
111
+ """Get or create a lock for a specific cache key."""
112
+ async with self._global_lock:
113
+ if key not in self._locks:
114
+ self._locks[key] = asyncio.Lock()
115
+ return self._locks[key]
116
+
117
+ async def get_session(self, endpoint: str, headers: dict[str, str]) -> ClientSession:
118
+ """Get or create an MCP session for the given endpoint and headers.
119
+
120
+ Args:
121
+ endpoint: MCP server endpoint URL
122
+ headers: Headers including authorization
123
+
124
+ Returns:
125
+ An initialized ClientSession ready for tool calls
126
+ """
127
+ key = self._make_key(endpoint, headers)
128
+
129
+ # Check if session already exists (fast path)
130
+ if key in self._sessions:
131
+ session, _, _ = self._sessions[key]
132
+ return session
133
+
134
+ # Acquire lock for this specific key to prevent concurrent creation
135
+ lock = await self._get_lock(key)
136
+ async with lock:
137
+ # Double-check after acquiring lock
138
+ if key in self._sessions:
139
+ session, _, _ = self._sessions[key]
140
+ return session
141
+
142
+ # Create new session
143
+ session, client_ctx, session_ctx = await self._create_session(endpoint, headers)
144
+ self._sessions[key] = (session, client_ctx, session_ctx)
145
+ logger.debug(f"Created new MCP session for {endpoint} (key: {key[:32]}...)")
146
+ return session
147
+
148
+ async def _create_session(self, endpoint: str, headers: dict[str, str]) -> tuple[ClientSession, Any, Any]:
149
+ """Create a new MCP session.
150
+
151
+ Returns:
152
+ Tuple of (session, client_context, session_context) for lifecycle management
153
+ """
154
+ # Use the same protocol detection logic as client_wrapper
155
+ connection_strategies = [MCPProtol.STREAMABLE_HTTP, MCPProtol.SSE]
156
+ mcp_protocol = protocol_cache.get(endpoint, default=MCPProtol.UNKNOWN)
157
+ if mcp_protocol == MCPProtol.SSE:
158
+ connection_strategies = [MCPProtol.SSE, MCPProtol.STREAMABLE_HTTP]
159
+
160
+ last_exception: BaseException | None = None
161
+
162
+ for i, strategy in enumerate(connection_strategies):
163
+ try:
164
+ client = streamablehttp_client
165
+ if strategy == MCPProtol.SSE:
166
+ client = cast(Any, sse_client)
167
+
168
+ # Enter the client context manager manually
169
+ client_ctx = client(endpoint, headers=headers)
170
+ client_streams = await client_ctx.__aenter__()
171
+
172
+ try:
173
+ # Enter the session context manager manually
174
+ session = ClientSession(read_stream=client_streams[0], write_stream=client_streams[1])
175
+ session_ctx = session
176
+ await session.__aenter__()
177
+
178
+ try:
179
+ await session.initialize()
180
+ protocol_cache[endpoint] = strategy
181
+ return session, client_ctx, session_ctx
182
+ except BaseException:
183
+ await session.__aexit__(None, None, None)
184
+ raise
185
+ except BaseException:
186
+ await client_ctx.__aexit__(None, None, None)
187
+ raise
188
+
189
+ except* httpx.HTTPStatusError as eg:
190
+ for exc in eg.exceptions:
191
+ err = cast(httpx.HTTPStatusError, exc)
192
+ if err.response.status_code == 401:
193
+ raise AuthenticationRequiredError(exc) from exc
194
+ if i == len(connection_strategies) - 1:
195
+ raise
196
+ last_exception = eg
197
+ except* httpx.ConnectError as eg:
198
+ if i == len(connection_strategies) - 1:
199
+ error_msg = f"Failed to connect to MCP server at {endpoint}: Connection refused"
200
+ logger.error(f"MCP connection error: {error_msg}")
201
+ raise ConnectionError(error_msg) from eg
202
+ else:
203
+ logger.warning(
204
+ f"failed to connect to MCP server at {endpoint} via {strategy.name}, "
205
+ f"falling back to {connection_strategies[i + 1].name}"
206
+ )
207
+ last_exception = eg
208
+ except* httpx.TimeoutException as eg:
209
+ if i == len(connection_strategies) - 1:
210
+ error_msg = f"MCP server at {endpoint} timed out"
211
+ logger.error(f"MCP timeout error: {error_msg}")
212
+ raise TimeoutError(error_msg) from eg
213
+ else:
214
+ logger.warning(
215
+ f"MCP server at {endpoint} timed out via {strategy.name}, "
216
+ f"falling back to {connection_strategies[i + 1].name}"
217
+ )
218
+ last_exception = eg
219
+ except* httpx.RequestError as eg:
220
+ if i == len(connection_strategies) - 1:
221
+ exc_msg = str(eg.exceptions[0]) if eg.exceptions else "Unknown error"
222
+ error_msg = f"Network error connecting to MCP server at {endpoint}: {exc_msg}"
223
+ logger.error(f"MCP network error: {error_msg}")
224
+ raise ConnectionError(error_msg) from eg
225
+ else:
226
+ logger.warning(
227
+ f"network error connecting to MCP server at {endpoint} via {strategy.name}, "
228
+ f"falling back to {connection_strategies[i + 1].name}"
229
+ )
230
+ last_exception = eg
231
+ except* McpError:
232
+ if i < len(connection_strategies) - 1:
233
+ logger.warning(
234
+ f"failed to connect via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
235
+ )
236
+ else:
237
+ raise
238
+
239
+ # Should not reach here, but just in case
240
+ if last_exception:
241
+ raise last_exception
242
+ raise RuntimeError(f"Failed to create MCP session for {endpoint}")
243
+
244
+ async def close_all(self) -> None:
245
+ """Close all cached sessions.
246
+
247
+ Should be called at the end of a request to clean up resources.
248
+
249
+ Note: We catch BaseException (not just Exception) because:
250
+ 1. CancelledError is a BaseException and can occur during cleanup
251
+ 2. anyio cancel scope errors can occur if cleanup runs in a different
252
+ task context than where the session was created
253
+ These are expected in streaming response scenarios and are handled gracefully.
254
+ """
255
+ errors = []
256
+ session_count = len(self._sessions)
257
+ for key, (session, client_ctx, _) in list(self._sessions.items()):
258
+ try:
259
+ await session.__aexit__(None, None, None)
260
+ except BaseException as e:
261
+ # Debug level since these errors are expected in streaming scenarios
262
+ # where cleanup runs in a different async context than session creation
263
+ logger.debug(f"Error closing MCP session {key}: {e}")
264
+ errors.append(e)
265
+ try:
266
+ await client_ctx.__aexit__(None, None, None)
267
+ except BaseException as e:
268
+ logger.debug(f"Error closing MCP client context {key}: {e}")
269
+ errors.append(e)
270
+
271
+ self._sessions.clear()
272
+ self._locks.clear()
273
+ logger.debug(f"Closed {session_count} MCP sessions")
274
+
275
+ if errors:
276
+ logger.debug(f"Encountered {len(errors)} errors while closing MCP sessions (expected in streaming)")
277
+
278
+
76
279
  @asynccontextmanager
77
280
  async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerator[ClientSession, Any]:
78
281
  # we use a ttl'd dict to cache the happy path protocol for each endpoint
@@ -151,6 +354,7 @@ async def list_mcp_tools(
151
354
  endpoint: str,
152
355
  headers: dict[str, str] | None = None,
153
356
  authorization: str | None = None,
357
+ session_manager: MCPSessionManager | None = None,
154
358
  ) -> ListToolDefsResponse:
155
359
  """List tools available from an MCP server.
156
360
 
@@ -158,6 +362,10 @@ async def list_mcp_tools(
158
362
  endpoint: MCP server endpoint URL
159
363
  headers: Optional base headers to include
160
364
  authorization: Optional OAuth access token (just the token, not "Bearer <token>")
365
+ session_manager: Optional MCPSessionManager for session reuse within a request.
366
+ When provided, sessions are cached and reused, avoiding redundant session
367
+ creation when list_mcp_tools and invoke_mcp_tool are called for the same
368
+ server within a request. (Fix for #4452)
161
369
 
162
370
  Returns:
163
371
  List of tool definitions from the MCP server
@@ -169,7 +377,9 @@ async def list_mcp_tools(
169
377
  final_headers = prepare_mcp_headers(headers, authorization)
170
378
 
171
379
  tools = []
172
- async with client_wrapper(endpoint, final_headers) as session:
380
+
381
+ # Helper function to process session and list tools
382
+ async def _list_tools_from_session(session):
173
383
  tools_result = await session.list_tools()
174
384
  for tool in tools_result.tools:
175
385
  tools.append(
@@ -183,15 +393,51 @@ async def list_mcp_tools(
183
393
  },
184
394
  )
185
395
  )
396
+
397
+ # If a session manager is provided, use it for session reuse (fix for #4452)
398
+ if session_manager is not None:
399
+ session = await session_manager.get_session(endpoint, final_headers)
400
+ await _list_tools_from_session(session)
401
+ else:
402
+ # Fallback to original behavior: create a new session for this call
403
+ async with client_wrapper(endpoint, final_headers) as session:
404
+ await _list_tools_from_session(session)
405
+
186
406
  return ListToolDefsResponse(data=tools)
187
407
 
188
408
 
409
+ def _parse_mcp_result(result) -> ToolInvocationResult:
410
+ """Parse MCP tool call result into ToolInvocationResult.
411
+
412
+ Args:
413
+ result: The raw MCP tool call result
414
+
415
+ Returns:
416
+ ToolInvocationResult with parsed content
417
+ """
418
+ content: list[InterleavedContentItem] = []
419
+ for item in result.content:
420
+ if isinstance(item, mcp_types.TextContent):
421
+ content.append(TextContentItem(text=item.text))
422
+ elif isinstance(item, mcp_types.ImageContent):
423
+ content.append(ImageContentItem(image=_URLOrData(data=item.data)))
424
+ elif isinstance(item, mcp_types.EmbeddedResource):
425
+ logger.warning(f"EmbeddedResource is not supported: {item}")
426
+ else:
427
+ raise ValueError(f"Unknown content type: {type(item)}")
428
+ return ToolInvocationResult(
429
+ content=content,
430
+ error_code=1 if result.isError else 0,
431
+ )
432
+
433
+
189
434
  async def invoke_mcp_tool(
190
435
  endpoint: str,
191
436
  tool_name: str,
192
437
  kwargs: dict[str, Any],
193
438
  headers: dict[str, str] | None = None,
194
439
  authorization: str | None = None,
440
+ session_manager: MCPSessionManager | None = None,
195
441
  ) -> ToolInvocationResult:
196
442
  """Invoke an MCP tool with the given arguments.
197
443
 
@@ -201,6 +447,9 @@ async def invoke_mcp_tool(
201
447
  kwargs: Tool invocation arguments
202
448
  headers: Optional base headers to include
203
449
  authorization: Optional OAuth access token (just the token, not "Bearer <token>")
450
+ session_manager: Optional MCPSessionManager for session reuse within a request.
451
+ When provided, sessions are cached and reused for multiple tool calls to
452
+ the same endpoint, avoiding redundant tools/list calls. (Fix for #4452)
204
453
 
205
454
  Returns:
206
455
  Tool invocation result with content and error information
@@ -211,20 +460,13 @@ async def invoke_mcp_tool(
211
460
  # Prepare headers with authorization handling
212
461
  final_headers = prepare_mcp_headers(headers, authorization)
213
462
 
214
- async with client_wrapper(endpoint, final_headers) as session:
463
+ # If a session manager is provided, use it for session reuse (fix for #4452)
464
+ if session_manager is not None:
465
+ session = await session_manager.get_session(endpoint, final_headers)
215
466
  result = await session.call_tool(tool_name, kwargs)
467
+ return _parse_mcp_result(result)
216
468
 
217
- content: list[InterleavedContentItem] = []
218
- for item in result.content:
219
- if isinstance(item, mcp_types.TextContent):
220
- content.append(TextContentItem(text=item.text))
221
- elif isinstance(item, mcp_types.ImageContent):
222
- content.append(ImageContentItem(image=_URLOrData(data=item.data)))
223
- elif isinstance(item, mcp_types.EmbeddedResource):
224
- logger.warning(f"EmbeddedResource is not supported: {item}")
225
- else:
226
- raise ValueError(f"Unknown content type: {type(item)}")
227
- return ToolInvocationResult(
228
- content=content,
229
- error_code=1 if result.isError else 0,
230
- )
469
+ # Fallback to original behavior: create a new session for each call
470
+ async with client_wrapper(endpoint, final_headers) as session:
471
+ result = await session.call_tool(tool_name, kwargs)
472
+ return _parse_mcp_result(result)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama_stack
3
- Version: 0.4.2
3
+ Version: 0.4.3
4
4
  Summary: Llama Stack
5
5
  Author-email: Meta Llama <llama-oss@meta.com>
6
6
  License: MIT
@@ -46,7 +46,7 @@ Requires-Dist: psycopg2-binary
46
46
  Requires-Dist: tornado>=6.5.3
47
47
  Requires-Dist: urllib3>=2.6.3
48
48
  Provides-Extra: client
49
- Requires-Dist: llama-stack-client==0.4.2; extra == "client"
49
+ Requires-Dist: llama-stack-client==0.4.3; extra == "client"
50
50
  Dynamic: license-file
51
51
 
52
52
  # Llama Stack
@@ -30,11 +30,11 @@ llama_stack/core/distribution.py,sha256=pASA0KJ_KTTRQbWP0H5OSvp1ZFQvprbMfE6OLBTq
30
30
  llama_stack/core/external.py,sha256=_UTuHkqMzDM07CMAGcPeschNm8NfMkr63iSrLLYc5lg,1869
31
31
  llama_stack/core/id_generation.py,sha256=HyTuFALhY0FTpHrYSZbykpPgoNShBeKfFexJz5xEFU8,1185
32
32
  llama_stack/core/inspect.py,sha256=0L_Em3kYuvqLQFa4Nn-47rzQtpzhuuWtcnpixCfQuv4,7250
33
- llama_stack/core/library_client.py,sha256=V5f7apz0heD5DyExwNXiEN0E5xGyQh279BeuVSSzclQ,21380
33
+ llama_stack/core/library_client.py,sha256=Lk1uNUCgnbx8WsCPmtVcwyjwcyU20_CH-hStAR93Fc0,24156
34
34
  llama_stack/core/providers.py,sha256=EblMlsWJKGHsXCTmVo-doCJ64JEpBy7-2DoupFkaTUo,5134
35
35
  llama_stack/core/request_headers.py,sha256=tUt-RvzUrl7yxbYKBe7nN5YBCgWxShz4cemLvl7XGxc,3692
36
36
  llama_stack/core/resolver.py,sha256=IRPPwi60uAe5mlj-NjAR41laP9Dp1WvAI3A-bTMB-mk,19383
37
- llama_stack/core/stack.py,sha256=YWk2opmFtsYmyEvjrUzDGhLkGV3SNN-omr_eVZKsS-8,27944
37
+ llama_stack/core/stack.py,sha256=cPvwjgldE3L9fEQEReKIIOtHNWEUtUtZx6DL17zIi34,28588
38
38
  llama_stack/core/start_stack.sh,sha256=3snlFzur13NS1_UnJQ6t8zK7R5DCRFJKJrz9YTJmWVA,2834
39
39
  llama_stack/core/testing_context.py,sha256=TIWetol6Sb2BSiqkq5X0knb0chG03GSpmjByFwVfY60,1438
40
40
  llama_stack/core/access_control/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
@@ -54,13 +54,13 @@ llama_stack/core/routers/tool_runtime.py,sha256=bRfPMlLxtdRQ7ad5BPTetw7fi_QT_xV5
54
54
  llama_stack/core/routers/vector_io.py,sha256=QqloV8ljTAvjvAytWRBCPnr5Aqm3LidLXalLxDh0W54,21972
55
55
  llama_stack/core/routing_tables/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
56
56
  llama_stack/core/routing_tables/benchmarks.py,sha256=hxHRQyk8MRVBpdLYIjy4Cim28sdAbPjf9tuZ0msddMg,2552
57
- llama_stack/core/routing_tables/common.py,sha256=DI7OQvadW9yeV1LVg0FP4d4p0JksbwJVYuVioXzkyQw,10353
57
+ llama_stack/core/routing_tables/common.py,sha256=1jPbybKPYQAllN9HeN5EKxCrLeVHG1kGvRfk2Uy3aQM,10847
58
58
  llama_stack/core/routing_tables/datasets.py,sha256=xWhcVUg6TuIcBMRbcYf8_keNc5PADv8PfVEBWmbabJ4,3025
59
59
  llama_stack/core/routing_tables/models.py,sha256=PyEYEHlB9uarnSpr6Mnw-bVDOPrANMO06I0dl2orbPg,11388
60
60
  llama_stack/core/routing_tables/scoring_functions.py,sha256=gICo9oTTwqT3Ifrbi84wmdcLf2ipiaU3RCoPO6IBt-Y,2464
61
61
  llama_stack/core/routing_tables/shields.py,sha256=KjfusJknyZ28evzhdJJLaOuiMJmjkzEPuilIlLH6AcY,2209
62
62
  llama_stack/core/routing_tables/toolgroups.py,sha256=X-Uh78mFtfhIpt1RMKHgaam3PD-hWzvmHu0YQj_EEWk,5602
63
- llama_stack/core/routing_tables/vector_stores.py,sha256=eUvzzWD6Nx9gTFW4ziMR6hGccYRhBKRQYOxbly2knWA,12363
63
+ llama_stack/core/routing_tables/vector_stores.py,sha256=ZLv1SGh_Z2N6eZjCp9_HV5L5nA_b8sio3XeZelP3JdQ,12560
64
64
  llama_stack/core/server/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
65
65
  llama_stack/core/server/auth.py,sha256=dCyBRg72nplOhKx0xvIiwcptVQmK6140VVCtGjSsvhw,7768
66
66
  llama_stack/core/server/auth_providers.py,sha256=1b9eBmvaz2aBQlNkkODaEQHYIF6WejS4bjc3gyr8sg8,20876
@@ -72,19 +72,19 @@ llama_stack/core/storage/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZh
72
72
  llama_stack/core/storage/datatypes.py,sha256=WZqYV_Pgy2nKwEEgjJR1_pST0VUHqY_rnPaWZ8-bjIU,9810
73
73
  llama_stack/core/storage/kvstore/__init__.py,sha256=uTrLgzQMAgEUWthwT5LUfHnPAp41Std0zJzlTtPgP-A,309
74
74
  llama_stack/core/storage/kvstore/config.py,sha256=XSPhkugrvA9hxBc8DXclD2ktZzNGTQypIbwoH3FXGmI,1436
75
- llama_stack/core/storage/kvstore/kvstore.py,sha256=Ux5Q8g4fmji4nqKa6QMHQ-gVvhYwn0Jgnmeh0C1Oq8I,4291
75
+ llama_stack/core/storage/kvstore/kvstore.py,sha256=q1LNtQrAxlBBSuVVl6jnyd7v0JHcDpS_7U2h2QmWwtU,4601
76
76
  llama_stack/core/storage/kvstore/mongodb/__init__.py,sha256=1LZfkl1Mo3ObnKm7s8619xYiaEZzy95o8pvEifawBQ4,275
77
- llama_stack/core/storage/kvstore/mongodb/mongodb.py,sha256=oJqt6NKhQZpoaoW0ZSl9iq8WLHmP2SqjguMCaBX2ogk,3453
77
+ llama_stack/core/storage/kvstore/mongodb/mongodb.py,sha256=U8gCCzGhM0KzinwVa9huFzDNZgyVZ4TH7RmHCpH5RJc,3579
78
78
  llama_stack/core/storage/kvstore/postgres/__init__.py,sha256=JPP4RZZRUMKOnh9hsnB1mxQkftVfLvo9l2QcXZlUHbY,262
79
- llama_stack/core/storage/kvstore/postgres/postgres.py,sha256=-d3NQy5DBScMVswYG2lv-p5p74oErOxxdlJlQBsWDQI,4435
79
+ llama_stack/core/storage/kvstore/postgres/postgres.py,sha256=ohEcChgHBjBvkOzIBBtV7Kp1N8kL9CzWRSdSW2-zmMw,4648
80
80
  llama_stack/core/storage/kvstore/redis/__init__.py,sha256=FGi8DzDTHkWJufWWoJAXW2zjs2Ji5nsha2cGF84HZis,251
81
- llama_stack/core/storage/kvstore/redis/redis.py,sha256=6eFkcmVjFCVsSYZ9IGhSWBihzS2UMZhZYKuuoIDPR7A,3517
81
+ llama_stack/core/storage/kvstore/redis/redis.py,sha256=pA41GajiVle-8rbSq1SNExcIdexTpSnk_1vE6zydlfM,3649
82
82
  llama_stack/core/storage/kvstore/sqlite/__init__.py,sha256=tnKA5tuANI8w1wIJhhyTYpV5NUxyl5XypjdiHnr0PaY,253
83
83
  llama_stack/core/storage/kvstore/sqlite/sqlite.py,sha256=Aecv8Jk8XPOYB_yoU_XkjYUfxqqv2pO7xhVp6F_Urdg,7234
84
84
  llama_stack/core/storage/sqlstore/__init__.py,sha256=m-8Cg5fNJnxWZw2KVAHlkYPpGuMcrQliGWiYBQChl3Y,493
85
85
  llama_stack/core/storage/sqlstore/authorized_sqlstore.py,sha256=-HWmTl_rQx_97DhFQPx1mREXJr8X3IyplqrpELdEgVc,14474
86
- llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py,sha256=wcqAyT5Bzl5FzdgP7lzGEdsEBG__lbACyw6QzFTfd68,14905
87
- llama_stack/core/storage/sqlstore/sqlstore.py,sha256=Q3mi-Piv9e8trY6O5z5cQ-izbwcELCLSuWFoR-Y0T98,2980
86
+ llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py,sha256=J3pNnZUpUoU3IhPmQpHbLMK7BXguPnCKjQZCYR_wayQ,15248
87
+ llama_stack/core/storage/sqlstore/sqlstore.py,sha256=rGuYGZerCrZZ_G8vMbeP5_ef4jnMFAcXFv-8jniNjno,3228
88
88
  llama_stack/core/store/__init__.py,sha256=6rFevukvm0CQxaf3uV3lr5vJTZRB-igWXK2Bd32SNsk,244
89
89
  llama_stack/core/store/registry.py,sha256=QSoJYCZ3VjHN3NkqECYGwvv0FJpozuh_9k22Lt_y2tY,7377
90
90
  llama_stack/core/utils/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
@@ -200,9 +200,9 @@ llama_stack/providers/inline/agents/meta_reference/agents.py,sha256=Q1fqMRZ2Yfb4
200
200
  llama_stack/providers/inline/agents/meta_reference/config.py,sha256=OY82xSuMfh8bMDfV7BZO4P-MMzzjUaMxOayNn4hNPj4,1375
201
201
  llama_stack/providers/inline/agents/meta_reference/safety.py,sha256=e5taz2bA9Ks3nlRgLp0CGHsgU_Rv_6ed4bGemQlGEv8,1684
202
202
  llama_stack/providers/inline/agents/meta_reference/responses/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
203
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py,sha256=uLC2Ht9PAsVygAs_p7jRAHxPzm0ZmWFb9k1sU04tNq8,23695
204
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py,sha256=PahZY5hfQYJPkuHPe9GKz0FyGawn-AS51zz4LF4CktI,70087
205
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py,sha256=ga42XVUEQNzvjTLvzvBQpRleU-mvfklUdt8dg08Nid4,22632
203
+ llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py,sha256=TPuSd7g6sA95b6-oR4ailQ_v6Te7HNqpcVzLFwEFwtU,25192
204
+ llama_stack/providers/inline/agents/meta_reference/responses/streaming.py,sha256=PsHByA2JUBU2KESWXpHq4rBfV4wMBwZ4JMSLZR_kxns,70310
205
+ llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py,sha256=2aEeqhWP8kCEubFwj9zFj_QzfXD1MssHDedhjvuMZLs,23202
206
206
  llama_stack/providers/inline/agents/meta_reference/responses/types.py,sha256=VFUUSbaH15cHzy72pYcxDfuEdKbUxT3F3jg5X-42kbE,8721
207
207
  llama_stack/providers/inline/agents/meta_reference/responses/utils.py,sha256=yUGkUZcitTmb6mePDK8Zvw2FDB5FIvxVCx0b887Gqiw,22977
208
208
  llama_stack/providers/inline/batches/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
@@ -464,7 +464,7 @@ llama_stack/providers/remote/vector_io/milvus/config.py,sha256=ZRf00ifVSEC5te8Ww
464
464
  llama_stack/providers/remote/vector_io/milvus/milvus.py,sha256=epkA43GUBu6u5-uGw1Dk-MNcETTjve4GCGo52p46wnY,16406
465
465
  llama_stack/providers/remote/vector_io/pgvector/__init__.py,sha256=yAFgSkT7gwkRtO-xWtm4nkP99QQxZAxsynDUEybIf9Y,564
466
466
  llama_stack/providers/remote/vector_io/pgvector/config.py,sha256=yMVFQf4fJDev8ShnbnOicXnLF9r45MIxCKYcbmDFyfw,1540
467
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py,sha256=UwGoaSFgPOJGp6jdWdMcB5ZSGpeP7RP13TTUfk1wD2U,19601
467
+ llama_stack/providers/remote/vector_io/pgvector/pgvector.py,sha256=LD9SsKHexAg438rBo6Ro479aAWDb1U5o_zTObjr3GI8,20040
468
468
  llama_stack/providers/remote/vector_io/qdrant/__init__.py,sha256=Tim4k1BhOROl5BCHuS1YzH6_MilgNdNrv1qe4zFcla4,554
469
469
  llama_stack/providers/remote/vector_io/qdrant/config.py,sha256=4akCc4YbYYIBCENj8NRUm3OI-ix0zTOmbgq8RG3nIWU,1115
470
470
  llama_stack/providers/remote/vector_io/qdrant/qdrant.py,sha256=VlA-y7F52LC4paHEV6BRQyxWAdBBzh0gWH1hUUs7JMQ,15404
@@ -485,7 +485,7 @@ llama_stack/providers/utils/datasetio/url_utils.py,sha256=Jhxw-bRs5PbkG-pCSRKqoK
485
485
  llama_stack/providers/utils/files/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
486
486
  llama_stack/providers/utils/files/form_data.py,sha256=-yRXfeSf3AfyRShz6HiSlTPFSa1y4qdlRDxwQOLMK40,2284
487
487
  llama_stack/providers/utils/inference/__init__.py,sha256=Ocwqyn7ytwdt1vMFXsPBoa5D6uhA1fIljF-HiIsVvKw,1089
488
- llama_stack/providers/utils/inference/embedding_mixin.py,sha256=CQ2vvLM1Y6FJcYXdWO5zxVjHud43-W4CjqqdTM47kns,3325
488
+ llama_stack/providers/utils/inference/embedding_mixin.py,sha256=n94bHl0YItWOpRhrCstnz0tr-ZABEJ3DTEGOyWD4t_4,3592
489
489
  llama_stack/providers/utils/inference/inference_store.py,sha256=DG_W3FCwA6fjpz9GGko_ylm-4-kNBkxltcXNKOW52OI,9944
490
490
  llama_stack/providers/utils/inference/litellm_openai_mixin.py,sha256=MDxQi5KZ1fIjxdpDhvcA7_9Rvu3HNrSQWTTqLrOkUII,13621
491
491
  llama_stack/providers/utils/inference/model_registry.py,sha256=CEanQgkbESbAxwczpXQnPHjSa9pGzQBq43tF8t7S9kk,8157
@@ -495,7 +495,7 @@ llama_stack/providers/utils/inference/prompt_adapter.py,sha256=qE2y1EpY0zfYAN6We
495
495
  llama_stack/providers/utils/inference/stream_utils.py,sha256=WdM3SPMh9xfOAcpd67_Ld0AaNKrvoYIdJ4nrFOTMmp8,675
496
496
  llama_stack/providers/utils/memory/__init__.py,sha256=pA4yikPZUO-A0K2nscz5tEp1yYSBtvglbgC5pe-FGKE,214
497
497
  llama_stack/providers/utils/memory/file_utils.py,sha256=MsjispuPO0cMXmRqAoTJ-dwM9uzgYn4aiRFBM-aHP9w,712
498
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py,sha256=8nq_Nj_pLpznlf7YbsZAHeSucH1FaHD3IILfNoquwpo,58620
498
+ llama_stack/providers/utils/memory/openai_vector_store_mixin.py,sha256=7yVvuq_uc9DxOd7SLqcJOVmu2c0AkBGmPmXZDaM1TgQ,60115
499
499
  llama_stack/providers/utils/memory/vector_store.py,sha256=HiNCtZ4OzvPk4RGuRNks7CnbrDoppYyrJdjST-emnZQ,11941
500
500
  llama_stack/providers/utils/responses/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
501
501
  llama_stack/providers/utils/responses/responses_store.py,sha256=4ziPwlqxMS-mrlB2rL2M8LU9sYewmInH9zN5WPHK00U,10397
@@ -504,7 +504,7 @@ llama_stack/providers/utils/scoring/aggregation_utils.py,sha256=S2XR7DLXEVf7FCTO
504
504
  llama_stack/providers/utils/scoring/base_scoring_fn.py,sha256=Upd-Hu8ftoZArfAuHx00uDkvEtmVKWg8U9tgkArro4o,4098
505
505
  llama_stack/providers/utils/scoring/basic_scoring_utils.py,sha256=JmGA65N55raHR7rmcdWdTQPaZy4X7I69KFDvfN6716A,714
506
506
  llama_stack/providers/utils/tools/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
507
- llama_stack/providers/utils/tools/mcp.py,sha256=zjGAkmPRRcNALfp9uWaTNKl0WRaCTf7jrASY-Ka1-SA,9315
507
+ llama_stack/providers/utils/tools/mcp.py,sha256=0P0VQgrx7WWoomrmi-1QZFSMlb8DTDLxx8u8TJgMH84,20244
508
508
  llama_stack/providers/utils/tools/ttl_dict.py,sha256=4Bv3Nri9HM2FSckfaJJbqzICpO2S_yOXcsgVj_yvsoA,2021
509
509
  llama_stack/providers/utils/vector_io/__init__.py,sha256=fGP7xUTCZ3E77v3FtEuGyW2k3S5Tn9x0Kk1aEIafoxA,552
510
510
  llama_stack/providers/utils/vector_io/vector_utils.py,sha256=l1asZcxbtlRIaZUi_LbXagclCAveD-k6w28LfOZwqBk,7147
@@ -513,7 +513,7 @@ llama_stack/telemetry/constants.py,sha256=LtXE61xwNL3cBYZXKcXcbwD_Uh1jazP3V8a0od
513
513
  llama_stack/telemetry/helpers.py,sha256=7uarMIHL5ngOUXQZxkH96corFxE7Jk5JaizRQ8Z8Ok0,1694
514
514
  llama_stack/testing/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
515
515
  llama_stack/testing/api_recorder.py,sha256=oGGTrzzBYNNvOIcvcFZenNPthr0yziJ7hlGPtckx460,39240
516
- llama_stack-0.4.2.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
516
+ llama_stack-0.4.3.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
517
517
  llama_stack_api/__init__.py,sha256=5XNQGpundjXTutLgnYp6B1t6KITWXH_of626GciNma4,28103
518
518
  llama_stack_api/agents.py,sha256=u0sg3AoWCip5o8T4DMTM8uqP3BsdbkKbor3PmxKTg0g,7143
519
519
  llama_stack_api/connectors.py,sha256=PcAwndbVQC6pm5HGSlNprqYFTZzhCM7SYHPyRkSIoaQ,4644
@@ -537,7 +537,7 @@ llama_stack_api/scoring_functions.py,sha256=0lP_ZENUh12i12ibg-_XNNPKLHi_TvB8H5Ly
537
537
  llama_stack_api/shields.py,sha256=9dNMyTVL0xcR8_BXCHb_zuAJC7Cz8pX8htRwW2-EDSw,2823
538
538
  llama_stack_api/tools.py,sha256=eCyZx806VfpBJgsuJF9R3urA8ljF3g0kLapNpx9YRzY,7518
539
539
  llama_stack_api/vector_io.py,sha256=3tYy8xLhVvx_rMtfi5Pxv0GwTMm1TfMYwq82tFqRz1U,36517
540
- llama_stack_api/vector_stores.py,sha256=DMkwPSg05VJOvBJrVlwFU6EHBQEmarADhIzzgt1jjwE,1709
540
+ llama_stack_api/vector_stores.py,sha256=mILSO3k2X-Hg4G3YEdq54fKAenCuAzRAXqpNg-_D_Ng,1832
541
541
  llama_stack_api/version.py,sha256=V3jdW3iFPdfOt4jWzJA-di7v0zHLYsn11hNtRzkY7uQ,297
542
542
  llama_stack_api/admin/__init__.py,sha256=VnJn9fbk-dFkRrm1P5UWlAOcZDA2jf6dx9W5nt-WgOY,1049
543
543
  llama_stack_api/admin/api.py,sha256=m14f4iBUJf-G0qITj66o-TFKCSUiD9U12XRnZ1Slr_w,1961
@@ -575,8 +575,8 @@ llama_stack_api/inspect_api/api.py,sha256=XkdM7jJ3_UlEIE4woEVi5mO2O1aNn9_FPtb18N
575
575
  llama_stack_api/inspect_api/fastapi_routes.py,sha256=I7R8roy6einYDzrPN8wNjrRokpoSNZi9zrtmLHS1vDw,2575
576
576
  llama_stack_api/inspect_api/models.py,sha256=EW69EHkOG8i0GS8KW8Kz6WaPZV74hzwad8dGXWrrKhs,683
577
577
  llama_stack_api/internal/__init__.py,sha256=hZiF7mONpu54guvMUTW9XpfkETUO55u6hqYOYkz8Bt0,307
578
- llama_stack_api/internal/kvstore.py,sha256=J_lFhhlFcg9uCyn6J758qWSbMIW5nvcfvB66kkitF8g,790
579
- llama_stack_api/internal/sqlstore.py,sha256=IMOmHiNpxrjqvYNmcsdxbGDUdnMvviFo8AlmT9P27IQ,2219
578
+ llama_stack_api/internal/kvstore.py,sha256=mgNJz6r8_ju3I3JT2Pz5fSX_9DLv_OupsS2NnJe3usY,833
579
+ llama_stack_api/internal/sqlstore.py,sha256=FBIQhG7VOVMMSTe24uMigfxEWXnarY0hzx9HjrNXVnI,2262
580
580
  llama_stack_api/llama_stack_api/__init__.py,sha256=5XNQGpundjXTutLgnYp6B1t6KITWXH_of626GciNma4,28103
581
581
  llama_stack_api/llama_stack_api/agents.py,sha256=u0sg3AoWCip5o8T4DMTM8uqP3BsdbkKbor3PmxKTg0g,7143
582
582
  llama_stack_api/llama_stack_api/connectors.py,sha256=PcAwndbVQC6pm5HGSlNprqYFTZzhCM7SYHPyRkSIoaQ,4644
@@ -600,7 +600,7 @@ llama_stack_api/llama_stack_api/scoring_functions.py,sha256=0lP_ZENUh12i12ibg-_X
600
600
  llama_stack_api/llama_stack_api/shields.py,sha256=9dNMyTVL0xcR8_BXCHb_zuAJC7Cz8pX8htRwW2-EDSw,2823
601
601
  llama_stack_api/llama_stack_api/tools.py,sha256=eCyZx806VfpBJgsuJF9R3urA8ljF3g0kLapNpx9YRzY,7518
602
602
  llama_stack_api/llama_stack_api/vector_io.py,sha256=3tYy8xLhVvx_rMtfi5Pxv0GwTMm1TfMYwq82tFqRz1U,36517
603
- llama_stack_api/llama_stack_api/vector_stores.py,sha256=DMkwPSg05VJOvBJrVlwFU6EHBQEmarADhIzzgt1jjwE,1709
603
+ llama_stack_api/llama_stack_api/vector_stores.py,sha256=mILSO3k2X-Hg4G3YEdq54fKAenCuAzRAXqpNg-_D_Ng,1832
604
604
  llama_stack_api/llama_stack_api/version.py,sha256=V3jdW3iFPdfOt4jWzJA-di7v0zHLYsn11hNtRzkY7uQ,297
605
605
  llama_stack_api/llama_stack_api/admin/__init__.py,sha256=VnJn9fbk-dFkRrm1P5UWlAOcZDA2jf6dx9W5nt-WgOY,1049
606
606
  llama_stack_api/llama_stack_api/admin/api.py,sha256=m14f4iBUJf-G0qITj66o-TFKCSUiD9U12XRnZ1Slr_w,1961
@@ -638,8 +638,8 @@ llama_stack_api/llama_stack_api/inspect_api/api.py,sha256=XkdM7jJ3_UlEIE4woEVi5m
638
638
  llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py,sha256=I7R8roy6einYDzrPN8wNjrRokpoSNZi9zrtmLHS1vDw,2575
639
639
  llama_stack_api/llama_stack_api/inspect_api/models.py,sha256=EW69EHkOG8i0GS8KW8Kz6WaPZV74hzwad8dGXWrrKhs,683
640
640
  llama_stack_api/llama_stack_api/internal/__init__.py,sha256=hZiF7mONpu54guvMUTW9XpfkETUO55u6hqYOYkz8Bt0,307
641
- llama_stack_api/llama_stack_api/internal/kvstore.py,sha256=J_lFhhlFcg9uCyn6J758qWSbMIW5nvcfvB66kkitF8g,790
642
- llama_stack_api/llama_stack_api/internal/sqlstore.py,sha256=IMOmHiNpxrjqvYNmcsdxbGDUdnMvviFo8AlmT9P27IQ,2219
641
+ llama_stack_api/llama_stack_api/internal/kvstore.py,sha256=mgNJz6r8_ju3I3JT2Pz5fSX_9DLv_OupsS2NnJe3usY,833
642
+ llama_stack_api/llama_stack_api/internal/sqlstore.py,sha256=FBIQhG7VOVMMSTe24uMigfxEWXnarY0hzx9HjrNXVnI,2262
643
643
  llama_stack_api/llama_stack_api/providers/__init__.py,sha256=a_187ghsdPNYJ5xLizqKYREJJLBa-lpcIhLp8spgsH8,841
644
644
  llama_stack_api/llama_stack_api/providers/api.py,sha256=ytwxri9s6p8j9ClFKgN9mfa1TF0VZh1o8W5cVZR49rc,534
645
645
  llama_stack_api/llama_stack_api/providers/fastapi_routes.py,sha256=jb1yrXEk1MdtcgWCToSZtaB-wjKqv5uVKIkvduXoKlM,1962
@@ -648,8 +648,8 @@ llama_stack_api/providers/__init__.py,sha256=a_187ghsdPNYJ5xLizqKYREJJLBa-lpcIhL
648
648
  llama_stack_api/providers/api.py,sha256=ytwxri9s6p8j9ClFKgN9mfa1TF0VZh1o8W5cVZR49rc,534
649
649
  llama_stack_api/providers/fastapi_routes.py,sha256=jb1yrXEk1MdtcgWCToSZtaB-wjKqv5uVKIkvduXoKlM,1962
650
650
  llama_stack_api/providers/models.py,sha256=nqBzh9je_dou35XFjYGD43hwKgjWy6HIRmGWUrcGqOw,653
651
- llama_stack-0.4.2.dist-info/METADATA,sha256=otXCJCHPmU0tTV9dG5BsYWmxgbWqrlfeicQgZx5OlCc,12464
652
- llama_stack-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
653
- llama_stack-0.4.2.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
654
- llama_stack-0.4.2.dist-info/top_level.txt,sha256=pyNYneZU5w62BaExic-GC1ph5kk8JI2mJFwzqiZy2cU,28
655
- llama_stack-0.4.2.dist-info/RECORD,,
651
+ llama_stack-0.4.3.dist-info/METADATA,sha256=i3L_soHo8hFMex4qrg30BHHC4f79XoW3jdR3Zza76Yk,12464
652
+ llama_stack-0.4.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
653
+ llama_stack-0.4.3.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
654
+ llama_stack-0.4.3.dist-info/top_level.txt,sha256=pyNYneZU5w62BaExic-GC1ph5kk8JI2mJFwzqiZy2cU,28
655
+ llama_stack-0.4.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -22,5 +22,7 @@ class KVStore(Protocol):
22
22
 
23
23
  async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ...
24
24
 
25
+ async def shutdown(self) -> None: ...
26
+
25
27
 
26
28
  __all__ = ["KVStore"]
@@ -75,5 +75,7 @@ class SqlStore(Protocol):
75
75
  nullable: bool = True,
76
76
  ) -> None: ...
77
77
 
78
+ async def shutdown(self) -> None: ...
79
+
78
80
 
79
81
  __all__ = ["ColumnDefinition", "ColumnType", "SqlStore"]
@@ -22,5 +22,7 @@ class KVStore(Protocol):
22
22
 
23
23
  async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ...
24
24
 
25
+ async def shutdown(self) -> None: ...
26
+
25
27
 
26
28
  __all__ = ["KVStore"]
@@ -75,5 +75,7 @@ class SqlStore(Protocol):
75
75
  nullable: bool = True,
76
76
  ) -> None: ...
77
77
 
78
+ async def shutdown(self) -> None: ...
79
+
78
80
 
79
81
  __all__ = ["ColumnDefinition", "ColumnType", "SqlStore"]
@@ -42,6 +42,7 @@ class VectorStoreInput(BaseModel):
42
42
  :param embedding_model: Name of the embedding model to use for vector generation
43
43
  :param embedding_dimension: Dimension of the embedding vectors
44
44
  :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
45
+ :param vector_store_name: (Optional) Human-readable name for the vector store
45
46
  """
46
47
 
47
48
  vector_store_id: str
@@ -49,3 +50,4 @@ class VectorStoreInput(BaseModel):
49
50
  embedding_dimension: int
50
51
  provider_id: str | None = None
51
52
  provider_vector_store_id: str | None = None
53
+ vector_store_name: str | None = None
@@ -42,6 +42,7 @@ class VectorStoreInput(BaseModel):
42
42
  :param embedding_model: Name of the embedding model to use for vector generation
43
43
  :param embedding_dimension: Dimension of the embedding vectors
44
44
  :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
45
+ :param vector_store_name: (Optional) Human-readable name for the vector store
45
46
  """
46
47
 
47
48
  vector_store_id: str
@@ -49,3 +50,4 @@ class VectorStoreInput(BaseModel):
49
50
  embedding_dimension: int
50
51
  provider_id: str | None = None
51
52
  provider_vector_store_id: str | None = None
53
+ vector_store_name: str | None = None